Model

library(DiagrammeR) 
# Nodes
 #node [shape = box]
 # S [label = 'Matched\n(S=1)',fontsize=7]
 # C [label = 'Not censored\n(C=0)',fontsize=7]
gr1<-
DiagrammeR::grViz("
digraph causal {

# Nodes
  node [shape = plaintext]
  a [label = 'Observed\nConfounders\n(Z)',fontsize=10]
  b [label = 'Unobserved\nConfounders\n(U)',fontsize=10]
  c [label = 'Early\nDrop-out\n(Y)',fontsize=10]
  d [label = 'Residential\nPrograms\n(X)',fontsize=10]

# Edges
  edge [color = black,
        arrowhead = vee]
  rankdir = TB;
  
  b -> c 
  b -> a 
  a -> c  

  d -> c [minlen=1]
  d -> a [minlen=1]
  
 # a -> S #[minlen=1]
 # Z -> S #[minlen=1]
  
#  a -> C #[minlen=3]
#  Z -> C #[minlen=3]
  { rank = same; b; a; c }
# { rank = same; S; C }
  { rankdir = LR; a; d }

# Graph
  graph [overlap = true]
}")
gr1

Figure 1. Directed Acyclic Graph

#  {rank=same ; A -> B -> C -> D};
#       {rank=same ;           F -> E[dir=back]};
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3733703/
#Cohort matching on a variable associated with both outcome and censoring
#Cohort matching on a confounder. We let A denote an exposure, Y denote an outcome, and C denote a confounder and matching variable. The variable S indicates whether an individual in the source population is selected for the matched study (1: selected, 0: not selected). See Section 2-7 for details.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7064555/
gr2<-
DiagrammeR::grViz("
digraph causal {

  # Nodes
  node [shape = plaintext]
  a [label = 'Residential\nPrograms\n(X)',fontsize=10]
  b [label = 'Unobserved\nConfounders\n(U)',fontsize=10]
  c [label = 'Early\nDrop-out\n(Y)',fontsize=10]
  d [label = 'Observed\nConfounders\n(Z)',fontsize=10]

  # Edges
  edge [color = black,
        arrowhead = vee]
  rankdir = TB
  a -> c [minlen=3]
  d -> a [minlen=3]
  d -> c [minlen=9]
  
  b -> a [minlen=1]
  b -> c
  
{ rank = same; c; d }
#{ rank = same; b; d }
  rankdir = TB
{ rank = same; d; c } #Ver si lo saco, creo que da problemas
  
  # Graph
  graph [overlap = true]
}")#LR

Balance

We selected treatments at baseline for each user, leaving 85,048 observations. Then, we distinguished between residential 12,706 and ambulatory (72,267) treatments. We imputed cases that did not have a defined treatment assigned 75.


We selected the following variables of interest:

  • “Starting Substance” (sus_ini_mvv)
  • “Marital Status” (estado_conyugal_2)
  • “Educational Attainment” (escolaridad_rec)
  • “Age of Onset of Drug Use” (edad_ini_cons)
  • “Frequency of use of primary drug” (freq_cons_sus_prin)
  • “Motive of Admission to Treatment” (origen_ingreso_mod)
  • “Psychiatric co-morbidity” (dg_cie_10_rec)
  • “Drug Dependence” (dg_trs_cons_sus_or)
  • “Chilean Region of the Center” (nombre_region)
  • “Type of Center (Public)” (tipo_centro_pub)
  • “Sex” (sexo_2)
  • “Age at Admission to Treatment” (edad_al_ing)
  • “Date of Admission to Treatment” (fech_ing_num)
  • “Evaluation of the Therapeutic Process” (*) (evaluacindelprocesoteraputico)
  • “Early Dropout (Against Staff Advice)” (abandono_temprano_rec) (Y)
  • “Residential Type of Plan” (tipo_de_plan_res) (Z)


library(compareGroups)

match.on_tot <- c("row", "hash_key","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","tipo_centro_pub","sexo_2","edad_al_ing","fech_ing_num","abandono_temprano_rec","tipo_de_plan_res","duplicates_filtered","dg_trs_cons_sus_or","evaluacindelprocesoteraputico")
#dg_trs_cons_sus_or

CONS_C1_df_dup_SEP_2020_match<-
  CONS_C1_df_dup_SEP_2020 %>% 
  dplyr::filter(dup==1) %>% #, tipo_de_plan_2 %in% c("PG-PR","M-PR","PG-PAI","M-PAI","PG-PAB","M-PAB")
  dplyr::mutate(tipo_de_plan_res=dplyr::case_when(grepl("PR",as.character(tipo_de_plan_2))~1,
                                                  grepl("PAI",as.character(tipo_de_plan_2))~0,
                                                  grepl("PAB",as.character(tipo_de_plan_2))~0,
                                                  TRUE~NA_real_)) %>% 
  dplyr::mutate(tipo_de_plan_res=factor(tipo_de_plan_res)) %>% 
  dplyr::mutate(abandono_temprano_rec=factor(if_else(as.character(motivodeegreso_mod_imp)=="Early Drop-out",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(dg_trs_cons_sus_or=factor(if_else(as.character(dg_trs_cons_sus_or)=="Drug dependence",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(tipo_centro_pub=factor(if_else(as.character(tipo_centro)=="Public",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(condicion_ocupacional_corr=factor(condicion_ocupacional_corr),cat_ocupacional_corr=factor(cat_ocupacional_corr)) %>% 
  dplyr::mutate(dg_trs_fis_rec=factor(dplyr::case_when(as.character(diagnostico_trs_fisico)=="En estudio"~"Diagnosis unknown (under study)",as.character(diagnostico_trs_fisico)=="Sin trastorno"~'Without physical comorbidity',cnt_diagnostico_trs_fisico>0 ~'With physical comorbidity',
                                             TRUE~NA_character_)))%>%
    dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered=T,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%   
dplyr::mutate(freq_cons_sus_prin=parse_factor(as.character(freq_cons_sus_prin),levels=c('Did not use', 'Less than 1 day a week','2 to 3 days a week','4 to 6 days a week','1 day a week or more','Daily'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
  dplyr::mutate(evaluacindelprocesoteraputico=dplyr::case_when(grepl("1",as.character(evaluacindelprocesoteraputico))~'1-High Achievement',grepl("2",as.character(evaluacindelprocesoteraputico))~'2-Medium Achievement',grepl("3",as.character(evaluacindelprocesoteraputico))~'3-Minimum Achievement', TRUE~as.character(evaluacindelprocesoteraputico))) %>% 
  dplyr::mutate(evaluacindelprocesoteraputico=parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
  dplyr::select_(.dots = match.on_tot) %>% 
  dplyr::mutate(more_one_treat=factor(ifelse(duplicates_filtered>1,1,0))) %>% 
  data.table::data.table()
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
#CONS_C1_df_dup_SEP_2020_match %>% 
  #dplyr::group_by(dg_trs_fis) %>% dplyr::summarise(q1=quantile(dias_treat_imp_sin_na,.25),q2=quantile(dias_treat_imp_sin_na,.5),q3=quantile(dias_treat_imp_sin_na,.75)) ---> las distribuciones por días de tratamiento de las categorías de respuesta tienden a ser bastante similares, aunquequienes tienen una comorbiliad física definida tienen más tiempo en el estudio.
invisible("La diferencia en días de tratamiento entre las categorías de enfermedad psiquiátrica, indica que quienes se encuentran en estudio tienen muchos menos días en tratamiento que quienes no tienen una comorbilidad o quienes tienen una definida. No es lo mismo con el caso de la enfermedad física, en donde tienden a ser bastante similares")

invisible("Decidí no incluir diagnóstico de enferemedad física, porque hay algunas condiciones que son crónicas o que pueden serlo, y que no tengo cómo validarlas a lo largo del tratamiento")
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

attr(CONS_C1_df_dup_SEP_2020_match$sus_ini_mod_mvv,"label")<-"Starting Substance"
attr(CONS_C1_df_dup_SEP_2020_match$estado_conyugal_2,"label")<-"Marital Status"
attr(CONS_C1_df_dup_SEP_2020_match$escolaridad_rec,"label")<-"Educational Attainment"
attr(CONS_C1_df_dup_SEP_2020_match$edad_ini_cons,"label")<-"Age of Onset of Drug Use"
attr(CONS_C1_df_dup_SEP_2020_match$freq_cons_sus_prin,"label")<-"Frequency of use of primary drug"
attr(CONS_C1_df_dup_SEP_2020_match$origen_ingreso_mod,"label")<-"Motive of Admission to Treatment"
attr(CONS_C1_df_dup_SEP_2020_match$dg_cie_10_rec,"label")<-"Psychiatric co-morbidity"
attr(CONS_C1_df_dup_SEP_2020_match$nombre_region,"label")<-"Chilean Region of the Center"
attr(CONS_C1_df_dup_SEP_2020_match$tipo_centro_pub,"label")<-"Type of Center (Public)"
attr(CONS_C1_df_dup_SEP_2020_match$sexo_2,"label")<-"Sex"
attr(CONS_C1_df_dup_SEP_2020_match$edad_al_ing,"label")<-"Age at Admission"
attr(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,"label")<-"Date of Admission to Treatment"
attr(CONS_C1_df_dup_SEP_2020_match$abandono_temprano_rec,"label")<-"Early Dropout"
attr(CONS_C1_df_dup_SEP_2020_match$tipo_de_plan_res,"label")<-"Residential Type of Plan"
attr(CONS_C1_df_dup_SEP_2020_match$duplicates_filtered,"label")<-"No. of Treatments in the Database"
attr(CONS_C1_df_dup_SEP_2020_match$dg_trs_cons_sus_or,"label")<-"Drug Dependence"
attr(CONS_C1_df_dup_SEP_2020_match$evaluacindelprocesoteraputico,"label")<-"Evaluation of the Therapeutic Process"

knitr::opts_chunk$set(echo = FALSE, warning=FALSE, message=FALSE)

table1_all <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ duplicates_filtered+ dg_trs_cons_sus_or+ evaluacindelprocesoteraputico, method= c(
                                            sus_ini_mod_mvv=3,
                                            estado_conyugal_2=3,
                                            escolaridad_rec=3,
                                            edad_ini_cons=3,
                                            freq_cons_sus_prin=3,
                                            origen_ingreso_mod=3,
                                            dg_cie_10_rec=3,
                                            dg_trs_cons_sus_or=3,
                                            nombre_region=3,
                                            tipo_centro_pub=3,
                                            sexo_2=3,
                                            dg_trs_cons_sus_or=3,
                                            edad_al_ing=2,
                                            fech_ing_num=2,
                                            abandono_temprano_rec=3,
                                            duplicates_filtered=3,
                                            evaluacindelprocesoteraputico=3),
                       data = CONS_C1_df_dup_SEP_2020_match,
                       include.miss = T,
                       var.equal=T)
)
table1_more_one <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ dg_trs_cons_sus_or+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ evaluacindelprocesoteraputico, method= c(
                                            sus_ini_mod_mvv=3,
                                            estado_conyugal_2=3,
                                            escolaridad_rec=3,
                                            edad_ini_cons=3,
                                            freq_cons_sus_prin=3,
                                            origen_ingreso_mod=3,
                                            dg_cie_10_rec=3,
                                            dg_trs_cons_sus_or=3,
                                            nombre_region=3,
                                            tipo_centro_pub=3,
                                            sexo_2=3,
                                            dg_trs_cons_sus_or=3,
                                            edad_al_ing=2,
                                            fech_ing_num=2,
                                            abandono_temprano_rec=3,
                                            evaluacindelprocesoteraputico=3),
                       data = CONS_C1_df_dup_SEP_2020_match,
                       include.miss = T,
                       var.equal=T,
                       subset= more_one_treat==1)
)
table1_only_one <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ dg_trs_cons_sus_or+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ evaluacindelprocesoteraputico, method= c(
                                            sus_ini_mod_mvv=3,
                                            estado_conyugal_2=3,
                                            escolaridad_rec=3,
                                            edad_ini_cons=3,
                                            freq_cons_sus_prin=3,
                                            origen_ingreso_mod=3,
                                            dg_cie_10_rec=3,
                                            dg_trs_cons_sus_or=3,
                                            nombre_region=3,
                                            tipo_centro_pub=3,
                                            sexo_2=3,
                                            dg_trs_cons_sus_or=3,
                                            edad_al_ing=2,
                                            fech_ing_num=2,
                                            abandono_temprano_rec=3,
                                            evaluacindelprocesoteraputico=3),
                       data = CONS_C1_df_dup_SEP_2020_match,
                       include.miss = T,
                       var.equal=T,
                       subset= more_one_treat==0)
)
 #Possible values are: 1 - for analysis as "normal-distributed"; 2 - forces analysis as "continuous non-normal"; 3 - forces analysis as "categorical"; and 4 - NA, which performs a Shapiro-Wilks test to decide between normal or non-normal. 

restab1_all <- createTable(table1_all, show.p.overall = T)
restab1_more_one <- createTable(table1_more_one, show.p.overall = T)
restab1_only_one <- createTable(table1_only_one, show.p.overall = T)

pvals1 <- getResults(table1_all)
#p.adjust(pvals, method = "BH")
 export2md(restab1_all, size=10, first.strip=T, hide.no="no", position="center",
           format="html",caption= "Table 1. Summary descriptives at baseline, between Users with Residential and Ambulatory Treatments from 2010-2019",col.names=c("Variables","Residential", "Ambulatory", "p-value"))%>%
  kableExtra::add_footnote(c("Note. Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;", "Categorical variables are presented as number (%)"), notation = "none")%>%
  kableExtra::scroll_box(width = "100%", height = "375px")
Table 1. Summary descriptives at baseline, between Users with Residential and Ambulatory Treatments from 2010-2019
Variables Residential Ambulatory p-value
N=72267 N=12706
Starting Substance: 0.000
Alcohol 41507 (57.4%) 5080 (40.0%)
Cocaine hydrochloride 2682 (3.71%) 477 (3.75%)
Marijuana 18412 (25.5%) 4556 (35.9%)
Other 1669 (2.31%) 318 (2.50%)
Cocaine paste 2767 (3.83%) 1086 (8.55%)
‘Missing’ 5230 (7.24%) 1189 (9.36%)
Marital Status: <0.001
Married/Shared living arrangements 26185 (36.2%) 2910 (22.9%)
Separated/Divorced 7721 (10.7%) 1320 (10.4%)
Single 37343 (51.7%) 8328 (65.5%)
Widower 869 (1.20%) 133 (1.05%)
‘Missing’ 149 (0.21%) 15 (0.12%)
Educational Attainment: <0.001
3-Completed primary school or less 20062 (27.8%) 3862 (30.4%)
2-Completed high school or less 39565 (54.7%) 7044 (55.4%)
1-More than high school 12279 (17.0%) 1777 (14.0%)
‘Missing’ 361 (0.50%) 23 (0.18%)
Frequency of use of primary drug: 0.000
Did not use 1095 (1.52%) 85 (0.67%)
Less than 1 day a week 2862 (3.96%) 133 (1.05%)
2 to 3 days a week 22372 (31.0%) 1329 (10.5%)
4 to 6 days a week 12258 (17.0%) 1654 (13.0%)
1 day a week or more 5335 (7.38%) 272 (2.14%)
Daily 27938 (38.7%) 9219 (72.6%)
‘Missing’ 407 (0.56%) 14 (0.11%)
Motive of Admission to Treatment: 0.000
Spontaneous 33720 (46.7%) 4273 (33.6%)
Assisted Referral 4950 (6.85%) 3013 (23.7%)
Other 3766 (5.21%) 740 (5.82%)
Justice Sector 7159 (9.91%) 812 (6.39%)
Health Sector 22672 (31.4%) 3868 (30.4%)
Psychiatric co-morbidity: <0.001
Without psychiatric comorbidity 29070 (40.2%) 3245 (25.5%)
Diagnosis unknown (under study) 13310 (18.4%) 2771 (21.8%)
With psychiatric comorbidity 29887 (41.4%) 6690 (52.7%)
Type of Center (Public): 0.000
FALSE 14964 (20.7%) 9066 (71.4%)
TRUE 57300 (79.3%) 3623 (28.5%)
‘Missing’ 3 (0.00%) 17 (0.13%)
Sex: <0.001
Men 54806 (75.8%) 8761 (69.0%)
Women 17461 (24.2%) 3945 (31.0%)
Drug Dependence: 0.000
FALSE 22150 (30.7%) 1049 (8.26%)
TRUE 50116 (69.3%) 11657 (91.7%)
‘Missing’ 1 (0.00%) 0 (0.00%)
Age at Admission 34.5 [27.6;43.5] 32.6 [26.3;40.9] <0.001
Date of Admission to Treatment 16577 [15730;17359] 16154 [15342;17023] <0.001
Early Dropout: <0.001
FALSE 61074 (84.5%) 10201 (80.3%)
TRUE 11190 (15.5%) 2499 (19.7%)
‘Missing’ 3 (0.00%) 6 (0.05%)
No. of Treatments in the Database: .
1 58708 (81.2%) 8533 (67.2%)
2 10087 (14.0%) 2804 (22.1%)
3 2471 (3.42%) 927 (7.30%)
4 714 (0.99%) 295 (2.32%)
5 192 (0.27%) 94 (0.74%)
6 67 (0.09%) 36 (0.28%)
7 23 (0.03%) 11 (0.09%)
8 4 (0.01%) 6 (0.05%)
10 1 (0.00%) 0 (0.00%)
Drug Dependence: 0.000
FALSE 22150 (30.7%) 1049 (8.26%)
TRUE 50116 (69.3%) 11657 (91.7%)
‘Missing’ 1 (0.00%) 0 (0.00%)
Evaluation of the Therapeutic Process: <0.001
1-High Achievement 14081 (19.5%) 2831 (22.3%)
2-Medium Achievement 21728 (30.1%) 4237 (33.3%)
3-Minimum Achievement 31549 (43.7%) 5302 (41.7%)
‘Missing’ 4909 (6.79%) 336 (2.64%)
Note. Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;
Categorical variables are presented as number (%)


Of the 85,048 users, we selected 85,048 that fulfilled the conditions stated above (100%).


Additionally, we generated a correlation plot to get an overview of heterogeneous correlations between the different variables.


require(polycor)
#Corresponde a la apreciación clínica que hace el equipo o profesional tratante, la persona en tratamiento y su familia, del nivel alcanzado de logro de los objetivos terapéuticos planteados al inicio del proceso y descritos en el plan de tratamiento personalizado. Los criterios incluyen la evaluación del estado clínico y psicosocial al momento del egreso y una apreciación pronostica del equipo tratante.

#Computes a heterogenous correlation matrix, consisting of Pearson product-moment correlations between numeric variables, polyserial correlations between numeric and ordinal variables, and polychoric correlations between 
tiempo_antes_hetcor<-Sys.time()
hetcor_mat<-hetcor(CONS_C1_df_dup_SEP_2020_match[,-c("hash_key","row","more_one_treat","duplicates_filtered")], ML = T, std.err =T, use="pairwise.complete.obs", bins=3, pd=TRUE)
tiempo_despues_hetcor<-Sys.time()
tiempo_hetcor<-tiempo_despues_hetcor-tiempo_antes_hetcor

attr(hetcor_mat$correlations,"dimnames")[[2]][1]<-"Starting Substance"
attr(hetcor_mat$correlations,"dimnames")[[2]][2]<-"Marital Status"
attr(hetcor_mat$correlations,"dimnames")[[2]][3]<-"Educational Attainment"
attr(hetcor_mat$correlations,"dimnames")[[2]][4]<-"Age of Onset of Drug Use"
attr(hetcor_mat$correlations,"dimnames")[[2]][5]<-"Frequency of use of primary drug"
attr(hetcor_mat$correlations,"dimnames")[[2]][6]<-"Motive of Admission to Treatment"
attr(hetcor_mat$correlations,"dimnames")[[2]][7]<-"Psychiatric comorbidity"
#attr(hetcor_mat$correlations,"dimnames")[[2]][8]<-"Physical comorbidity"
attr(hetcor_mat$correlations,"dimnames")[[2]][8]<-"Chilean Region of the Center"
attr(hetcor_mat$correlations,"dimnames")[[2]][9]<-"Type of Center (Public)"
attr(hetcor_mat$correlations,"dimnames")[[2]][10]<-"Sex"
attr(hetcor_mat$correlations,"dimnames")[[2]][11]<-"Age at Admission"
attr(hetcor_mat$correlations,"dimnames")[[2]][12]<-"Date of Admission"
attr(hetcor_mat$correlations,"dimnames")[[2]][13]<-"Early Drop out"
attr(hetcor_mat$correlations,"dimnames")[[2]][14]<-"Residential Treatment"
attr(hetcor_mat$correlations,"dimnames")[[2]][15]<-"Drug Dependence"
attr(hetcor_mat$correlations,"dimnames")[[2]][16]<-"Evaluation of the Therapeutic Process"

attr(hetcor_mat$correlations,"dimnames")[[1]][1]<-"Starting Substance"
attr(hetcor_mat$correlations,"dimnames")[[1]][2]<-"Marital Status"
attr(hetcor_mat$correlations,"dimnames")[[1]][3]<-"Educational Attainment"
attr(hetcor_mat$correlations,"dimnames")[[1]][4]<-"Age of Onset of Drug Use"
attr(hetcor_mat$correlations,"dimnames")[[1]][5]<-"Frequency of use of primary drug"
attr(hetcor_mat$correlations,"dimnames")[[1]][6]<-"Motive of Admission to Treatment"
attr(hetcor_mat$correlations,"dimnames")[[1]][7]<-"Psychiatric comorbidity"
#attr(hetcor_mat$correlations,"dimnames")[[1]][8]<-"Physical comorbidity"
attr(hetcor_mat$correlations,"dimnames")[[1]][8]<-"Chilean Region of the Center"
attr(hetcor_mat$correlations,"dimnames")[[1]][9]<-"Type of Center (Public)"
attr(hetcor_mat$correlations,"dimnames")[[1]][10]<-"Sex"
attr(hetcor_mat$correlations,"dimnames")[[1]][11]<-"Age at Admission"
attr(hetcor_mat$correlations,"dimnames")[[1]][12]<-"Date of Admission"
attr(hetcor_mat$correlations,"dimnames")[[1]][13]<-"Early Drop out"
attr(hetcor_mat$correlations,"dimnames")[[1]][14]<-"Residential Treatment"
attr(hetcor_mat$correlations,"dimnames")[[1]][15]<-"Drug Dependence"
attr(hetcor_mat$correlations,"dimnames")[[1]][16]<-"Evaluation of the Therapeutic Process"

attr(hetcor_mat$tests,"dimnames")[[2]][1]<-"Starting Substance"
attr(hetcor_mat$tests,"dimnames")[[2]][2]<-"Marital Status"
attr(hetcor_mat$tests,"dimnames")[[2]][3]<-"Educational Attainment"
attr(hetcor_mat$tests,"dimnames")[[2]][4]<-"Age of Onset of Drug Use"
attr(hetcor_mat$tests,"dimnames")[[2]][5]<-"Frequency of use of primary drug"
attr(hetcor_mat$tests,"dimnames")[[2]][6]<-"Motive of Admission to Treatment"
attr(hetcor_mat$tests,"dimnames")[[2]][7]<-"Psychiatric comorbidity"
#attr(hetcor_mat$tests,"dimnames")[[2]][8]<-"Physical comorbidity"
attr(hetcor_mat$tests,"dimnames")[[2]][8]<-"Chilean Region of the Center"
attr(hetcor_mat$tests,"dimnames")[[2]][9]<-"Type of Center (Public)"
attr(hetcor_mat$tests,"dimnames")[[2]][10]<-"Sex"
attr(hetcor_mat$tests,"dimnames")[[2]][11]<-"Age at Admission"
attr(hetcor_mat$tests,"dimnames")[[2]][12]<-"Date of Admission"
attr(hetcor_mat$tests,"dimnames")[[2]][13]<-"Early Drop out"
attr(hetcor_mat$tests,"dimnames")[[2]][14]<-"Residential Treatment"
attr(hetcor_mat$tests,"dimnames")[[2]][15]<-"Drug Dependence"
attr(hetcor_mat$tests,"dimnames")[[2]][16]<-"Evaluation of the Therapeutic Process"

attr(hetcor_mat$tests,"dimnames")[[1]][1]<-"Starting Substance"
attr(hetcor_mat$tests,"dimnames")[[1]][2]<-"Marital Status"
attr(hetcor_mat$tests,"dimnames")[[1]][3]<-"Educational Attainment"
attr(hetcor_mat$tests,"dimnames")[[1]][4]<-"Age of Onset of Drug Use"
attr(hetcor_mat$tests,"dimnames")[[1]][5]<-"Frequency of use of primary drug"
attr(hetcor_mat$tests,"dimnames")[[1]][6]<-"Motive of Admission to Treatment"
attr(hetcor_mat$tests,"dimnames")[[1]][7]<-"Psychiatric comorbidity"
#attr(hetcor_mat$tests,"dimnames")[[1]][8]<-"Physical comorbidity"
attr(hetcor_mat$tests,"dimnames")[[1]][8]<-"Chilean Region of the Center"
attr(hetcor_mat$tests,"dimnames")[[1]][9]<-"Type of Center (Public)"
attr(hetcor_mat$tests,"dimnames")[[1]][10]<-"Sex"
attr(hetcor_mat$tests,"dimnames")[[1]][11]<-"Age at Admission"
attr(hetcor_mat$tests,"dimnames")[[1]][12]<-"Date of Admission"
attr(hetcor_mat$tests,"dimnames")[[1]][13]<-"Early Drop out"
attr(hetcor_mat$tests,"dimnames")[[1]][14]<-"Residential Treatment"
attr(hetcor_mat$tests,"dimnames")[[1]][15]<-"Drug Dependence"
attr(hetcor_mat$tests,"dimnames")[[1]][16]<-"Evaluation of the Therapeutic Process"

hetcor_mat$tests[is.na(hetcor_mat$tests)]<-1

ggcorrplot<-
ggcorrplot::ggcorrplot(hetcor_mat$correlations,
           ggtheme = ggplot2::theme_void,
           insig = "blank",
           pch=1,
           pch.cex=3,
           tl.srt = 45, 
           #pch="ns",
            p.mat = hetcor_mat$tests, #  replacement has 144 rows, data has 169
            #type = "lower",
           colors = c("#6D9EC1", "white", "#E46726"), 
           tl.cex=8,
           lab=F)+
  #scale_x_discrete(labels = var_lbls_p345, drop = F) +
  #scale_y_discrete(labels = var_lbls_p345, drop = F) +
  theme(axis.text.x = element_blank())+
  #theme(axis.text.y = element_text(size=7.5,color ="black", hjust = 1))+
  theme(axis.text.y = element_blank())+
  theme(legend.position="bottom")

ggplotly(ggcorrplot, height = 800, width=800)%>% 
  layout(xaxis= list(showticklabels = FALSE)) %>% 
 layout(annotations = 
 list(x = .1, y = -0.031, text = "", 
      showarrow = F, xref='paper', yref='paper', 
      #xanchor='center', yanchor='auto', xshift=0, yshift=-0,
      font=list(size=11, color="darkblue"))
 )

Figure 2. Heterogeneous Correlation Matrix of Variables of Interest


Imputation


We generated a plot to see all the missing values in the sample.


Figure 3. Bar plot of Porcentaje of Missing Values per Variables at Basline






From the figure above, we could see that the starting substance (sus_ini_mvv), the onset of drug use (edad_ini_cons) and the evaluation of the therapeutic process (evaluacindelprocesoteraputico) had around 6% of missing data. These values should be imputed. We first focused on the age of onset of drug use. It is important to consider that the evaluation of the therapeutic process could be distorted due to censoring (many users did not finish their treatment, and did not have this evaluation in the study period).



Age at Admission

We started looking over the missing values in the age at admission (n8). Since there were not cases with more than one treatment that had not an age of admission, we did not have to impute taking into account serial dependencies in the dates of treatment.

Figure 5. Density Estimation of Distributions of Age at Admission & Imputed Age at Admission

Figure 5. Density Estimation of Distributions of Age at Admission & Imputed Age at Admission


As seen in the Figure above, distributions seem to differ. However, considering the low amount of missing values in this variable, we proceeded with the imputation with the mean, despite the differences found. The imputed values must not be greater than the age of onset of drug use and may not be lower than 16 years old. Values lower than this age may be considered less likely to receive treatment for adult population, so it would be most probably incorrect that they would be in this database.


## [1] "Users that had more than one treatment with no date of admission:0"


Age of Onset of Drug Use

Another variable worth imputing is the Age of Onset of Drug Use (n= 6,549).


Figure 6. Density Estimation of Distributions of Age Of Onset of Drug Use & Imputed Ones

Figure 6. Density Estimation of Distributions of Age Of Onset of Drug Use & Imputed Ones


Based on the figure above, the age of onset of drug use was similar between the imputed values and the observed. However, we followed the rules stated in Duplicates process (link). There were three logical conditions to fulfill in order to replace adequately these values in the database: the age of onset must not be greater than the age of onset of drug use in the primary substance at admission (1), may not be greater than the age of admission to treatment (2), and the age of onset of drug use must be greater than 4 years old. Then, we selected the minimum value of age of onset of drug use among the imputed, because one user could not have more than one age of onset of drug use.


## [1] "Number of users that had more than one different age of onset of drug use before replacement: 0"

Figure 7. Bar plot of Percentage of Incorrect Imputed Values per Imputation Sample

## [1] "Cases with more than missing one age of onset: 515"
## [1] "Number of rows with values that did not fulfilled the conditions: 0"
## [1] "Number of rows with values that did not fulfilled the conditions after replacement with the minimum by users: 0"
## [1] "Number of users that had different age of onset of drug use after replacement: 0"



There were 0 cases of imputed ages of onset of drug use that did not fulfilled the conditions necessary to replace the missing values with the imputed ones.


Starting Substance

Then we selected the most vulnerable value among the candidates of imputations of the starting substance (First, Cocaine paste, Cocaine hydrochloride or snort cocaine, Marijuana, Alcohol, and Other).


# Ver distintos valores propuestos para sustancia de inciio
sus_ini_mod_mvv_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$sus_ini_mod_mvv,
       amelia_fit$imputations$imp2$sus_ini_mod_mvv,
       amelia_fit$imputations$imp3$sus_ini_mod_mvv,
       amelia_fit$imputations$imp4$sus_ini_mod_mvv,
       amelia_fit$imputations$imp5$sus_ini_mod_mvv,
       amelia_fit$imputations$imp6$sus_ini_mod_mvv,
       amelia_fit$imputations$imp7$sus_ini_mod_mvv,
       amelia_fit$imputations$imp8$sus_ini_mod_mvv,
       amelia_fit$imputations$imp9$sus_ini_mod_mvv,
       amelia_fit$imputations$imp10$sus_ini_mod_mvv,
       amelia_fit$imputations$imp11$sus_ini_mod_mvv,
       amelia_fit$imputations$imp12$sus_ini_mod_mvv,
       amelia_fit$imputations$imp13$sus_ini_mod_mvv,
       amelia_fit$imputations$imp14$sus_ini_mod_mvv,
       amelia_fit$imputations$imp15$sus_ini_mod_mvv,
       amelia_fit$imputations$imp16$sus_ini_mod_mvv,
       amelia_fit$imputations$imp17$sus_ini_mod_mvv,
       amelia_fit$imputations$imp18$sus_ini_mod_mvv,
       amelia_fit$imputations$imp19$sus_ini_mod_mvv,
       amelia_fit$imputations$imp20$sus_ini_mod_mvv,
       amelia_fit$imputations$imp21$sus_ini_mod_mvv,
       amelia_fit$imputations$imp22$sus_ini_mod_mvv,
       amelia_fit$imputations$imp23$sus_ini_mod_mvv,
       amelia_fit$imputations$imp24$sus_ini_mod_mvv,
       amelia_fit$imputations$imp25$sus_ini_mod_mvv,
       amelia_fit$imputations$imp26$sus_ini_mod_mvv,
       amelia_fit$imputations$imp27$sus_ini_mod_mvv,
       amelia_fit$imputations$imp28$sus_ini_mod_mvv,
       amelia_fit$imputations$imp29$sus_ini_mod_mvv,
       amelia_fit$imputations$imp30$sus_ini_mod_mvv
       ) 

sus_ini_mod_mvv_imputed<-
sus_ini_mod_mvv_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Marijuana",as.character(.))~1,TRUE~0), .names="mar_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Alcohol",as.character(.))~1,TRUE~0), .names="oh_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Cocaine paste",as.character(.))~1,TRUE~0), .names="pb_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Cocaine hydrochloride",as.character(.))~1,TRUE~0), .names="coc_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Other",as.character(.))~1,TRUE~0), .names="otr_{col}"))%>%
        dplyr::mutate(sus_ini_mod_mvv_mar = base::rowSums(dplyr::select(., starts_with("mar_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_oh = base::rowSums(dplyr::select(., starts_with("oh_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_pb = base::rowSums(dplyr::select(., starts_with("pb_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_coc = base::rowSums(dplyr::select(., starts_with("coc_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_otr = base::rowSums(dplyr::select(., starts_with("otr_")))) %>% 
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_mar>0~1,TRUE~0)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_oh>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_pb>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_coc>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_otr>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_to_imputation=dplyr::case_when(sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_pb>0~"Cocaine paste",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_coc>0~"Cocaine hydrochloride",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_mar>0~"Marijuana",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_oh>0~"Alcohol",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_otr>0~"Other",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_pb>0~"Cocaine paste",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_coc>0~"Cocaine hydrochloride",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_mar>0~"Marijuana",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_oh>0~"Alcohol",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_otr>0~"Other")) %>% 
  janitor::clean_names()

sus_ini_mod_mvv_imputed<-
dplyr::select(sus_ini_mod_mvv_imputed,amelia_fit_imputations_imp1_row,sus_ini_mod_mvv_to_imputation)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss2<-
CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
   dplyr::left_join(sus_ini_mod_mvv_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(sus_ini_mod_mvv=factor(dplyr::case_when(is.na(sus_ini_mod_mvv)~as.character(sus_ini_mod_mvv_to_imputation),
                                 TRUE~as.character(sus_ini_mod_mvv)))) %>% 
  dplyr::select(-sus_ini_mod_mvv_to_imputation) %>% 
  data.table()
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#


Frequency of Use of the Primary Drug at Admission

Another variable that is worth imputing is the Frequency of use of primary drug at admission (n= 568). In case of ties, we selected the imputed values with the value with the most frequent drug use.


# Ver distintos valores propuestos para sustancia de inciio
freq_cons_sus_prin_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$freq_cons_sus_prin,
       amelia_fit$imputations$imp2$freq_cons_sus_prin,
       amelia_fit$imputations$imp3$freq_cons_sus_prin,
       amelia_fit$imputations$imp4$freq_cons_sus_prin,
       amelia_fit$imputations$imp5$freq_cons_sus_prin,
       amelia_fit$imputations$imp6$freq_cons_sus_prin,
       amelia_fit$imputations$imp7$freq_cons_sus_prin,
       amelia_fit$imputations$imp8$freq_cons_sus_prin,
       amelia_fit$imputations$imp9$freq_cons_sus_prin,
       amelia_fit$imputations$imp10$freq_cons_sus_prin,
       amelia_fit$imputations$imp11$freq_cons_sus_prin,
       amelia_fit$imputations$imp12$freq_cons_sus_prin,
       amelia_fit$imputations$imp13$freq_cons_sus_prin,
       amelia_fit$imputations$imp14$freq_cons_sus_prin,
       amelia_fit$imputations$imp15$freq_cons_sus_prin,
       amelia_fit$imputations$imp16$freq_cons_sus_prin,
       amelia_fit$imputations$imp17$freq_cons_sus_prin,
       amelia_fit$imputations$imp18$freq_cons_sus_prin,
       amelia_fit$imputations$imp19$freq_cons_sus_prin,
       amelia_fit$imputations$imp20$freq_cons_sus_prin,
       amelia_fit$imputations$imp21$freq_cons_sus_prin,
       amelia_fit$imputations$imp22$freq_cons_sus_prin,
       amelia_fit$imputations$imp23$freq_cons_sus_prin,
       amelia_fit$imputations$imp24$freq_cons_sus_prin,
       amelia_fit$imputations$imp25$freq_cons_sus_prin,
       amelia_fit$imputations$imp26$freq_cons_sus_prin,
       amelia_fit$imputations$imp27$freq_cons_sus_prin,
       amelia_fit$imputations$imp28$freq_cons_sus_prin,
       amelia_fit$imputations$imp29$freq_cons_sus_prin,
       amelia_fit$imputations$imp30$freq_cons_sus_prin
       ) 

freq_cons_sus_prin_imputed<-
freq_cons_sus_prin_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("1 day a week or more",as.character(.))~1,TRUE~0), .names="1_day_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("2 to 3 days a week",as.character(.))~1,TRUE~0), .names="2_3_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("4 to 6 days a week",as.character(.))~1,TRUE~0), .names="4_6_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Less than 1 day a week",as.character(.))~1,TRUE~0), .names="less_1_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Did not use",as.character(.))~1,TRUE~0), .names="did_not_{col}"))%>%
    dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Daily",as.character(.))~1,TRUE~0), .names="daily_{col}"))%>%
  dplyr::mutate(freq_cons_sus_prin_daily = base::rowSums(dplyr::select(., starts_with("daily_")))) %>% 
  dplyr::mutate(freq_cons_sus_prin_4_6 = base::rowSums(dplyr::select(., starts_with("4_6_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_2_3 = base::rowSums(dplyr::select(., starts_with("2_3_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_1_day = base::rowSums(dplyr::select(., starts_with("1_day_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_less_1 = base::rowSums(dplyr::select(., starts_with("less_1_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_did_not = base::rowSums(dplyr::select(., starts_with("did_not_")))) %>% 
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_1_day>0~1,TRUE~0)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_2_3>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_4_6>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_less_1>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_did_not>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_daily>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  #hierarchy
  dplyr::mutate(freq_cons_sus_prin_to_imputation=
                  dplyr::case_when(freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_daily>0~"Daily",
                                     freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_did_not>0~"Did not use",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_daily>0~"Daily",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_did_not>0~"Did not use")) %>% 
  janitor::clean_names()

freq_cons_sus_prin_imputed<-
dplyr::select(freq_cons_sus_prin_imputed,amelia_fit_imputations_imp1_row,freq_cons_sus_prin_to_imputation)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss3<-
CONS_C1_df_dup_SEP_2020_match_miss2 %>% 
   dplyr::left_join(freq_cons_sus_prin_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(freq_cons_sus_prin=factor(dplyr::case_when(is.na(freq_cons_sus_prin)~as.character(freq_cons_sus_prin_to_imputation), TRUE~as.character(freq_cons_sus_prin)))) %>% 
  data.table()


Educational Attainment

Another variable that is worth imputing is the Educational Attainment (n= 437). we followed the rules stated in Duplicates4 process (link). We were particularly cautious to impute attainments that would follow a progression from primary school to more than high school. For this purpose, we first looked over the actual values per user, filling intermediate gaps in educational attainment in users with intermediate null values (a), we overcame with the difficulty of the incorrect imputations, by logically selecting if there were any .


# Ver distintos valores propuestos para sustancia de inciio
escolaridad_rec_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
                  amelia_fit$imputations$imp1$hash_key,
                  amelia_fit$imputations$imp1$fech_ing_num,
                  amelia_fit$imputations$imp1$escolaridad_rec,
                  amelia_fit$imputations$imp2$escolaridad_rec,
                  amelia_fit$imputations$imp3$escolaridad_rec,
                  amelia_fit$imputations$imp4$escolaridad_rec,
                  amelia_fit$imputations$imp5$escolaridad_rec,
                  amelia_fit$imputations$imp6$escolaridad_rec,
                  amelia_fit$imputations$imp7$escolaridad_rec,
                  amelia_fit$imputations$imp8$escolaridad_rec,
                  amelia_fit$imputations$imp9$escolaridad_rec,
                  amelia_fit$imputations$imp10$escolaridad_rec,
                  amelia_fit$imputations$imp11$escolaridad_rec,
                  amelia_fit$imputations$imp12$escolaridad_rec,
                  amelia_fit$imputations$imp13$escolaridad_rec,
                  amelia_fit$imputations$imp14$escolaridad_rec,
                  amelia_fit$imputations$imp15$escolaridad_rec,
                  amelia_fit$imputations$imp16$escolaridad_rec,
                  amelia_fit$imputations$imp17$escolaridad_rec,
                  amelia_fit$imputations$imp18$escolaridad_rec,
                  amelia_fit$imputations$imp19$escolaridad_rec,
                  amelia_fit$imputations$imp20$escolaridad_rec,
                  amelia_fit$imputations$imp21$escolaridad_rec,
                  amelia_fit$imputations$imp22$escolaridad_rec,
                  amelia_fit$imputations$imp23$escolaridad_rec,
                  amelia_fit$imputations$imp24$escolaridad_rec,
                  amelia_fit$imputations$imp25$escolaridad_rec,
                  amelia_fit$imputations$imp26$escolaridad_rec,
                  amelia_fit$imputations$imp27$escolaridad_rec,
                  amelia_fit$imputations$imp28$escolaridad_rec,
                  amelia_fit$imputations$imp29$escolaridad_rec,
                  amelia_fit$imputations$imp30$escolaridad_rec) 

escolaridad_rec_imputed2<-
escolaridad_rec_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("3-Completed primary school or less",as.character(.))~1,TRUE~0), .names="3_primary_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("2-Completed high school or less",as.character(.))~1,TRUE~0), .names="2_high_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("1-More than high school",as.character(.))~1,TRUE~0), .names="1_more_high_{col}")) %>% 

  dplyr::mutate(escolaridad_rec_3_primary = base::rowSums(dplyr::select(., contains("3_primary_")))) %>% 
  dplyr::mutate(escolaridad_rec_2_high = base::rowSums(dplyr::select(., contains("2_high_"))))%>%
  dplyr::mutate(escolaridad_rec_1_more_high = base::rowSums(dplyr::select(., contains("1_more_high_"))))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#create an ordered index of the number of treatments by user
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

#:#:#:#;#;#;
CONS_C1_df_dup_SEP_2020_match_rn<-
    CONS_C1_df_dup_SEP_2020_match_miss %>%  #base de datos original, sin imputaciones
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(rn=row_number()) %>% 
    dplyr::ungroup() %>% 
    dplyr::select(rn)
#:#:#:#;#;#;
escolaridad_rec_imputed3<-
escolaridad_rec_imputed2 %>%   
  dplyr::left_join(cbind.data.frame(CONS_C1_df_dup_SEP_2020_match_miss$row, CONS_C1_df_dup_SEP_2020_match_miss$escolaridad_rec,CONS_C1_df_dup_SEP_2020_match_rn$rn),by=c("amelia_fit.imputations.imp1.row"="CONS_C1_df_dup_SEP_2020_match_miss$row")) %>%
  dplyr::rename("escolaridad_rec_original"="CONS_C1_df_dup_SEP_2020_match_miss$escolaridad_rec") %>%
  dplyr::mutate(escolaridad_rec_original=as.numeric(substr(escolaridad_rec_original, 1, 1))) %>%
  #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  #ordenar por tratamientos por usuario
  #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::arrange(amelia_fit.imputations.imp1.hash_key,`CONS_C1_df_dup_SEP_2020_match_rn$rn`) %>% 
  dplyr::group_by(amelia_fit.imputations.imp1.hash_key) %>%  
  dplyr::mutate(siguiente_escolaridad_rec_original=lead(escolaridad_rec_original), 
                subsig_escolaridad_rec_original=lead(escolaridad_rec_original,n =2), 
                rn=max(`CONS_C1_df_dup_SEP_2020_match_rn$rn`),
                n_na_esc_or=is.na(escolaridad_rec_original),
                sum_n_na_esc_or=sum(n_na_esc_or,na.rm=T),
                max_sum_n_na_esc_or=max(n_na_esc_or,na.rm=T)
                ) %>% 
#dplyr::select(amelia_fit.imputations.imp1.hash_key,amelia_fit.imputations.imp30.rn,
#              siguiente_escolaridad_rec_original,escolaridad_rec_original,amelia_fit.imputations.imp1.fech_ing_num)%>% View()
  dplyr::ungroup()

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#PREPARACIÓN  BASE DE DATOS PARA IMPUTACION Y CREACIÓN DE VARIABLES PARA CONDICIONES
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
escolaridad_rec_imputed4 <-
escolaridad_rec_imputed3 %>% 
  dplyr::select(amelia_fit.imputations.imp1.hash_key,`CONS_C1_df_dup_SEP_2020_match_rn$rn`,escolaridad_rec_original,escolaridad_rec_3_primary,escolaridad_rec_2_high, escolaridad_rec_1_more_high) %>%
  dplyr::rename("hash_key"="amelia_fit.imputations.imp1.hash_key") %>% 
  dplyr::rename("treat_no_for_usr"="CONS_C1_df_dup_SEP_2020_match_rn$rn") %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(treat_per_usr=max(treat_no_for_usr,na.rm=T)) %>% 
  dplyr::ungroup() %>% 
  tidyr::pivot_wider(names_from=treat_no_for_usr,
                     #names_glue = "ord_treat_esc_{.value}",
                     values_from=c(escolaridad_rec_original,escolaridad_rec_3_primary,escolaridad_rec_2_high,escolaridad_rec_1_more_high),values_fill = NA) %>% 
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
#Ver si existen inconsistencias en la escolaridad, pero no sólo inconsistencias inmediatas, sino con hasta 2 espacios entre tratamientos
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
  dplyr::mutate(escolaridad_rec_tot_cond=dplyr::case_when(
    (escolaridad_rec_original_10>escolaridad_rec_original_9)|(escolaridad_rec_original_10>escolaridad_rec_original_8)|(escolaridad_rec_original_10>escolaridad_rec_original_7)|
      (escolaridad_rec_original_9>escolaridad_rec_original_8)|(escolaridad_rec_original_9>escolaridad_rec_original_7)|(escolaridad_rec_original_9>escolaridad_rec_original_6)|
      (escolaridad_rec_original_8>escolaridad_rec_original_7)|(escolaridad_rec_original_8>escolaridad_rec_original_6)|(escolaridad_rec_original_8>escolaridad_rec_original_5)|
      (escolaridad_rec_original_7>escolaridad_rec_original_6)|(escolaridad_rec_original_7>escolaridad_rec_original_5)|(escolaridad_rec_original_7>escolaridad_rec_original_4)|
      (escolaridad_rec_original_6>escolaridad_rec_original_5)|(escolaridad_rec_original_6>escolaridad_rec_original_4)|(escolaridad_rec_original_6>escolaridad_rec_original_3)|
      (escolaridad_rec_original_5>escolaridad_rec_original_4)|(escolaridad_rec_original_5>escolaridad_rec_original_3)|(escolaridad_rec_original_5>escolaridad_rec_original_2)|
      (escolaridad_rec_original_4>escolaridad_rec_original_3)|(escolaridad_rec_original_4>escolaridad_rec_original_2)|(escolaridad_rec_original_4>escolaridad_rec_original_1)|
      (escolaridad_rec_original_3>escolaridad_rec_original_2)|(escolaridad_rec_original_3>escolaridad_rec_original_1)|
      (escolaridad_rec_original_2>escolaridad_rec_original_1)~1,TRUE~0)) %>% 
  #dplyr::filter(escolaridad_rec_tot_cond==1) %>% #View() #0 rows ¿y 374745c85601976177fe614a7370e475?
  #dplyr::filter(treat_per_usr>1) %>% 
  #:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
  # Ver si hay valores de escolaridad ausentes en una progresión de tratamientos
  #:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
  dplyr::mutate(sum_nas_esc=base::rowSums(is.na(dplyr::select(., starts_with("escolaridad_rec_original_")))))%>%
  
  dplyr::mutate(escolaridad_rec_tot_nas_en_medio=dplyr::case_when(
      (sum_nas_esc>10 & treat_per_usr==10)|
      (sum_nas_esc>1 & treat_per_usr==9)|
      (sum_nas_esc>2 & treat_per_usr==8)|
      (sum_nas_esc>3 & treat_per_usr==7)|
      (sum_nas_esc>4 & treat_per_usr==6)|
      (sum_nas_esc>5 & treat_per_usr==5)|
      (sum_nas_esc>6 & treat_per_usr==4)|
      (sum_nas_esc>7 & treat_per_usr==3)|
      (sum_nas_esc>8 & treat_per_usr==2)|
      (sum_nas_esc>9 & treat_per_usr==1)~1,TRUE~0)) %>% #18b1f9646a2cd6bebd962637cff0a21a 5 casos
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  #Generar la escolaridad al final
  #:#:#:#:#:#:#:#:#
  dplyr::mutate(last_esc=dplyr::case_when(treat_per_usr==10~escolaridad_rec_original_10,
                                          treat_per_usr==9~escolaridad_rec_original_9,
                                          treat_per_usr==8~escolaridad_rec_original_8,
                                          treat_per_usr==7~escolaridad_rec_original_7,
                                          treat_per_usr==6~escolaridad_rec_original_6,
                                          treat_per_usr==5~escolaridad_rec_original_5,
                                          treat_per_usr==4~escolaridad_rec_original_4,
                                          treat_per_usr==3~escolaridad_rec_original_3,
                                          treat_per_usr==2~escolaridad_rec_original_2,
                                          treat_per_usr==1~escolaridad_rec_original_1)) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#a0))si valor final vs. inicial son iguales, imputar todo lo que está en medio con el mismo valor
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(escolaridad_rec_original_9=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>9 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_9)) %>% 
  dplyr::mutate(escolaridad_rec_original_8=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>8 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_8)) %>% 
  dplyr::mutate(escolaridad_rec_original_7=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>7 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_7)) %>% 
  dplyr::mutate(escolaridad_rec_original_6=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>6 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_6)) %>% 
  dplyr::mutate(escolaridad_rec_original_5=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>5 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_5)) %>% 
  dplyr::mutate(escolaridad_rec_original_4=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>4 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_4)) %>% 
  dplyr::mutate(escolaridad_rec_original_3=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>3 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_3)) %>% 
  dplyr::mutate(escolaridad_rec_original_2=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>2 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_2)) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#a1))cambiar valores vacíos intermedios  /// fijarse en  & escolaridad_rec_tot_cond==1
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#reemplazar el número intermedio por cada tratamiento para cada usuario
  dplyr::mutate(escolaridad_rec_original_9=dplyr::case_when(escolaridad_rec_original_8==escolaridad_rec_original_10 & is.na(escolaridad_rec_original_9)&!is.na(escolaridad_rec_original_10)~escolaridad_rec_original_10,TRUE~escolaridad_rec_original_9)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_10)) %>% View()
  dplyr::mutate(escolaridad_rec_original_8=dplyr::case_when(escolaridad_rec_original_7==escolaridad_rec_original_9 & is.na(escolaridad_rec_original_8)&!is.na(escolaridad_rec_original_9)~escolaridad_rec_original_9,TRUE~escolaridad_rec_original_8)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_9)) %>% View()
  dplyr::mutate(escolaridad_rec_original_7=dplyr::case_when(escolaridad_rec_original_6==escolaridad_rec_original_8 & is.na(escolaridad_rec_original_7)&!is.na(escolaridad_rec_original_8)~escolaridad_rec_original_8 ,TRUE~escolaridad_rec_original_7)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_8)) %>% View()
  dplyr::mutate(escolaridad_rec_original_6=dplyr::case_when(escolaridad_rec_original_5==escolaridad_rec_original_7& is.na(escolaridad_rec_original_6)&!is.na(escolaridad_rec_original_7)~escolaridad_rec_original_7,TRUE~escolaridad_rec_original_6)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_7)) %>% View()
  dplyr::mutate(escolaridad_rec_original_5=dplyr::case_when(escolaridad_rec_original_4==escolaridad_rec_original_6  & is.na(escolaridad_rec_original_5)&!is.na(escolaridad_rec_original_6)~escolaridad_rec_original_6,TRUE~escolaridad_rec_original_5)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_6)) %>% View()
  dplyr::mutate(escolaridad_rec_original_4=dplyr::case_when(escolaridad_rec_original_3==escolaridad_rec_original_5  & is.na(escolaridad_rec_original_4)&!is.na(escolaridad_rec_original_5)~escolaridad_rec_original_5,TRUE~escolaridad_rec_original_4)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_5)) %>% View()
  dplyr::mutate(escolaridad_rec_original_3=dplyr::case_when(escolaridad_rec_original_2==escolaridad_rec_original_4  & is.na(escolaridad_rec_original_3)&!is.na(escolaridad_rec_original_4)~escolaridad_rec_original_4,TRUE~escolaridad_rec_original_3)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_4)) %>% View()
  dplyr::mutate(escolaridad_rec_original_2=dplyr::case_when(escolaridad_rec_original_1==escolaridad_rec_original_3  & is.na(escolaridad_rec_original_2)&!is.na(escolaridad_rec_original_3)~escolaridad_rec_original_3,TRUE~escolaridad_rec_original_2)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_3)) %>% View()
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
##a2))si tiene información en la segunda pero no en la primera, y no es un valor intermedio como secundaria completa (ya que en ese caso puede adoptar más de un valor: más o igual a ese valor), imputarlo
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(escolaridad_rec_original_2==3~3,
                                                            escolaridad_rec_original_2==1~1,
                                                            TRUE~escolaridad_rec_original_1)) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
##a3))si hay más de 2 tratamientos por usuarios, y tiene información en la segunda pero no en la primera, y es un valor intermedio pero tiene un tercer tratamiento con el mismo valor, imputarlo
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
    dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(escolaridad_rec_original_2==2 & escolaridad_rec_original_3==2~3,TRUE~escolaridad_rec_original_1))  %>% 

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#medidas para capturar inconsistencias a lo largo de todos los tratamientos de cada usuario
#escolaridad_rec_imputed4 %>% #escolaridad_rec_tot_cond
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==1~1,TRUE~0), .names="1_more_high_{col}")) %>% 
  dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==2~1,TRUE~0), .names="2_high_{col}")) %>% 
  dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==3~1,TRUE~0), .names="3_primary_{col}")) %>% 
  dplyr::mutate(suma_vals_escolaridad_rec_1_more_high = base::rowSums(dplyr::select(., starts_with("1_more_high_")))) %>% 
  dplyr::mutate(suma_vals_escolaridad_rec_2_high = base::rowSums(dplyr::select(., starts_with("2_high_")))) %>% 
  dplyr::mutate(suma_vals_escolaridad_rec_3_primary = base::rowSums(dplyr::select(., starts_with("3_primary_"))))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#IMPUTACIONES
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
escolaridad_rec_imputed5<-
escolaridad_rec_imputed4 %>% 
  #hacer una suma de más NA's de los que debería tener según la cantidad de tratamientos que tiene la persona
  #:#:#:#:#:#:#:#:#:
  dplyr::mutate(sum_nas_esc_post=base::rowSums(is.na(dplyr::select(., starts_with("escolaridad_rec_original_")))))%>%
  dplyr::mutate(escolaridad_rec_tot_nas_en_medio_post=dplyr::case_when(
      (sum_nas_esc_post>10 & treat_per_usr==10)|
      (sum_nas_esc_post>1 & treat_per_usr==9)|
      (sum_nas_esc_post>2 & treat_per_usr==8)|
      (sum_nas_esc_post>3 & treat_per_usr==7)|
      (sum_nas_esc_post>4 & treat_per_usr==6)|
      (sum_nas_esc_post>5 & treat_per_usr==5)|
      (sum_nas_esc_post>6 & treat_per_usr==4)|
      (sum_nas_esc_post>7 & treat_per_usr==3)|
      (sum_nas_esc_post>8 & treat_per_usr==2)|
      (sum_nas_esc_post>9 & treat_per_usr==1)~1,TRUE~0)) %>%
  #dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
  #d864967fa0b1c5bb1d4eb5f6a7c8c2c1
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b0))valor inicial y sólo un tratamiento, se imputa por el valor imputado más frecuente de las 30 bases de datos
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(
    is.na(escolaridad_rec_original_1) & treat_per_usr==1 & 
      (escolaridad_rec_3_primary_1>escolaridad_rec_2_high_1)& 
      (escolaridad_rec_2_high_1>escolaridad_rec_3_primary_1)~3,
    is.na(escolaridad_rec_original_1) & treat_per_usr==1 & 
      (escolaridad_rec_2_high_1>escolaridad_rec_3_primary_1)& 
      (escolaridad_rec_2_high_1>escolaridad_rec_1_more_high_1)~2,
    is.na(escolaridad_rec_original_1) & treat_per_usr==1 & 
      (escolaridad_rec_1_more_high_1>escolaridad_rec_3_primary_1)& 
      (escolaridad_rec_1_more_high_1>escolaridad_rec_2_high_1)~1,
    TRUE~escolaridad_rec_original_1)) %>% 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b1))valor en el segundo tratamiento es intermedio, inicial se imputa, dependiendo si primaria es mayor que intermedio o no
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  ###
  #dplyr::filter(is.na(escolaridad_rec_original_1),!is.na(escolaridad_rec_original_2)) %>%
  #dplyr::select(escolaridad_rec_original_1,escolaridad_rec_original_2,escolaridad_rec_3_primary_1,escolaridad_rec_2_high_1,escolaridad_rec_1_more_high_1) %>% View()
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#

  dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(
    is.na(escolaridad_rec_original_1) & escolaridad_rec_original_2==2 & (escolaridad_rec_3_primary_1>escolaridad_rec_2_high_1)~3,
    is.na(escolaridad_rec_original_1) & escolaridad_rec_original_2==2 & (escolaridad_rec_3_primary_1<escolaridad_rec_2_high_1)~2,TRUE~escolaridad_rec_original_1))%>%
    #dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
#610dd4dba4dbb62848691b6916828948
  #90d581cd11064c41b82f8e4d6ff7b70b
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b2))Valor final es vacío, hay un valor anterior
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_  
  dplyr::mutate(escolaridad_rec_original_10= dplyr::case_when(
  #
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==1~1,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==2 & 
      (escolaridad_rec_1_more_high_10>escolaridad_rec_2_high_10)~1,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==2 & 
      (escolaridad_rec_1_more_high_10<escolaridad_rec_2_high_10)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==3 & 
      (escolaridad_rec_1_more_high_10>escolaridad_rec_2_high_10) & (escolaridad_rec_1_more_high_10>escolaridad_rec_3_primary_10)~1,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==3 & 
        (escolaridad_rec_2_high_10 >escolaridad_rec_1_more_high_10) & (escolaridad_rec_2_high_10>escolaridad_rec_3_primary_10)~2,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==3 & 
      (escolaridad_rec_3_primary_10 >escolaridad_rec_2_high_10) & (escolaridad_rec_3_primary_10>escolaridad_rec_1_more_high_10)~2,TRUE~escolaridad_rec_original_10)) %>% 
 # dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
  #
    dplyr::mutate(escolaridad_rec_original_9= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==1~1,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==2 & 
      (escolaridad_rec_1_more_high_9>escolaridad_rec_2_high_9)~1,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==2 & 
      (escolaridad_rec_1_more_high_9<escolaridad_rec_2_high_9)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==3 & 
      (escolaridad_rec_1_more_high_9>escolaridad_rec_2_high_9) & (escolaridad_rec_1_more_high_9>escolaridad_rec_3_primary_9)~1,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==3 & 
        (escolaridad_rec_2_high_9 >escolaridad_rec_1_more_high_9) & (escolaridad_rec_2_high_9>escolaridad_rec_3_primary_9)~2,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==3 & 
      (escolaridad_rec_3_primary_9 >escolaridad_rec_2_high_9) & (escolaridad_rec_3_primary_9>escolaridad_rec_1_more_high_9)~2,TRUE~escolaridad_rec_original_9)) %>% 
  #
        dplyr::mutate(escolaridad_rec_original_8= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==1~1,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==2 & 
      (escolaridad_rec_1_more_high_8>escolaridad_rec_2_high_8)~1,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==2 & 
      (escolaridad_rec_1_more_high_8<escolaridad_rec_2_high_8)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==3 & 
      (escolaridad_rec_1_more_high_8>escolaridad_rec_2_high_8) & (escolaridad_rec_1_more_high_8>escolaridad_rec_3_primary_8)~1,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==3 & 
        (escolaridad_rec_2_high_8 >escolaridad_rec_1_more_high_8) & (escolaridad_rec_2_high_8>escolaridad_rec_3_primary_8)~2,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==3 & 
      (escolaridad_rec_3_primary_8 >escolaridad_rec_2_high_8) & (escolaridad_rec_3_primary_8>escolaridad_rec_1_more_high_8)~2,TRUE~escolaridad_rec_original_8)) %>% 
  #
        dplyr::mutate(escolaridad_rec_original_7= dplyr::case_when(
          #si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==1~1,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==2 & 
      (escolaridad_rec_1_more_high_7>escolaridad_rec_2_high_7)~1,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==2 & 
      (escolaridad_rec_1_more_high_7<escolaridad_rec_2_high_7)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==3 & 
      (escolaridad_rec_1_more_high_7>escolaridad_rec_2_high_7) & (escolaridad_rec_1_more_high_7>escolaridad_rec_3_primary_7)~1,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==3 & 
        (escolaridad_rec_2_high_7 >escolaridad_rec_1_more_high_7) & (escolaridad_rec_2_high_7>escolaridad_rec_3_primary_7)~2,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==3 & 
      (escolaridad_rec_3_primary_7 >escolaridad_rec_2_high_7) & (escolaridad_rec_3_primary_7>escolaridad_rec_1_more_high_7)~2,TRUE~escolaridad_rec_original_7)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_6= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==1~1,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==2 & 
      (escolaridad_rec_1_more_high_6>escolaridad_rec_2_high_6)~1,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==2 & 
      (escolaridad_rec_1_more_high_6<escolaridad_rec_2_high_6)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==3 & 
      (escolaridad_rec_1_more_high_6>escolaridad_rec_2_high_6) & (escolaridad_rec_1_more_high_6>escolaridad_rec_3_primary_6)~1,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==3 & 
        (escolaridad_rec_2_high_6 >escolaridad_rec_1_more_high_6) & (escolaridad_rec_2_high_6>escolaridad_rec_3_primary_6)~2,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==3 & 
      (escolaridad_rec_3_primary_6 >escolaridad_rec_2_high_6) & (escolaridad_rec_3_primary_6>escolaridad_rec_1_more_high_6)~2,TRUE~escolaridad_rec_original_6)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_5= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==1~1,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==2 & 
      (escolaridad_rec_1_more_high_5>escolaridad_rec_2_high_5)~1,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==2 & 
      (escolaridad_rec_1_more_high_5<escolaridad_rec_2_high_5)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==3 & 
      (escolaridad_rec_1_more_high_5>escolaridad_rec_2_high_5) & (escolaridad_rec_1_more_high_5>escolaridad_rec_3_primary_5)~1,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==3 & 
        (escolaridad_rec_2_high_5 >escolaridad_rec_1_more_high_5) & (escolaridad_rec_2_high_5>escolaridad_rec_3_primary_5)~2,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==3 & 
      (escolaridad_rec_3_primary_5 >escolaridad_rec_2_high_5) & (escolaridad_rec_3_primary_5>escolaridad_rec_1_more_high_5)~2,TRUE~escolaridad_rec_original_5)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_4= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==1~1,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_4>escolaridad_rec_2_high_4)~1,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_4<escolaridad_rec_2_high_4)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==3 & 
      (escolaridad_rec_1_more_high_4>escolaridad_rec_2_high_4) & (escolaridad_rec_1_more_high_4>escolaridad_rec_3_primary_4)~1,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==3 & 
        (escolaridad_rec_2_high_4 >escolaridad_rec_1_more_high_4) & (escolaridad_rec_2_high_4>escolaridad_rec_3_primary_4)~2,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==3 & 
      (escolaridad_rec_3_primary_4 >escolaridad_rec_2_high_4) & (escolaridad_rec_3_primary_4>escolaridad_rec_1_more_high_4)~2,TRUE~escolaridad_rec_original_4)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_3= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==1~1,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_3>escolaridad_rec_2_high_3)~1,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_3<escolaridad_rec_2_high_3)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_2==3 & 
      (escolaridad_rec_1_more_high_3>escolaridad_rec_2_high_3) & (escolaridad_rec_1_more_high_3>escolaridad_rec_3_primary_3)~1,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_2==3 & 
        (escolaridad_rec_2_high_3 >escolaridad_rec_1_more_high_3) & (escolaridad_rec_2_high_3>escolaridad_rec_3_primary_3)~2,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_2==3 & 
      (escolaridad_rec_3_primary_3 >escolaridad_rec_2_high_3) & (escolaridad_rec_3_primary_3>escolaridad_rec_1_more_high_3)~2,TRUE~escolaridad_rec_original_3))
#:#:#:#:
 # dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
 #:#:#:#:
  #comprobar si en verdad calza:
  #%>%dplyr::filter(hash_key=="ef4325cda7ddd92f6218bb910c3e0895") %>% dplyr::select(escolaridad_rec_original_1,escolaridad_rec_original_2,treat_per_usr,escolaridad_rec_3_primary_1,escolaridad_rec_2_high_1)
  #610dd4dba4dbb62848691b6916828948
  #90d581cd11064c41b82f8e4d6ff7b70b
#escolaridad_rec_imputed5 %>% 
#    dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)%>%dplyr::filter(hash_key=="98d6644d995ea2c8777a683160728004") %>% dplyr::select(escolaridad_rec_original_3,escolaridad_rec_original_4,escolaridad_rec_original_4,treat_per_usr,escolaridad_rec_3_primary_4,escolaridad_rec_2_high_4,escolaridad_rec_1_more_high_4)

#98d6644d995ea2c8777a683160728004
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b2))Valor final es vacío, hay un valor anterior
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_  
escolaridad_rec_imputed6<-
escolaridad_rec_imputed5 %>% 
#dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)%>%dplyr::filter(hash_key=="98d6644d995ea2c8777a683160728004") %>% dplyr::select(escolaridad_rec_original_4,escolaridad_rec_original_4,treat_per_usr,escolaridad_rec_3_primary_4,escolaridad_rec_2_high_4,escolaridad_rec_1_more_high_3)
  dplyr::select(hash_key,starts_with("escolaridad_rec_original_")) %>%
  tidyr::pivot_longer(cols = starts_with("escolaridad_rec_original_"),
   names_to = "rn",
   names_prefix = "escolaridad_rec_original_") %>% 
  dplyr::filter(!is.na(value)) %>% 
  dplyr::mutate(hash_rn=paste0(hash_key,"_",rn)) %>% 
  dplyr::select(hash_rn,value)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss4<-
CONS_C1_df_dup_SEP_2020_match_miss3 %>%
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(rn=row_number()) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(hash_rn=paste0(hash_key,"_",rn)) %>% 
  dplyr::left_join(escolaridad_rec_imputed6, by=c("hash_rn")) %>% 
  dplyr::mutate(escolaridad_rec=dplyr::case_when(value==1~"1-More than high school",value==2~"2-Completed high school or less",value==3~"3-Completed primary school or less")) %>% 
  #
  dplyr::arrange(hash_key,rn) %>% 
  #dplyr::mutate(escolaridad_rec=dplyr::case_when(is.na(escolaridad_rec)~value,TRUE~as.character(escolaridad_rec))) %>% 
  dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered =F,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%
  dplyr::select(-value,-hash_rn) %>% 
  data.table()

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
paste("Check inconsistencies with posterior educational attainments (0= No inconsistencies):",CONS_C1_df_dup_SEP_2020_match_miss4 %>% 
  dplyr::arrange(hash_key,rn) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(escolaridad_rec_num=as.numeric(substr(escolaridad_rec, 1, 1)),
                sig_escolaridad_rec_num=lead(escolaridad_rec_num),
                ant_escolaridad_rec_num=lag(escolaridad_rec_num)) %>% 
  dplyr::ungroup() %>% 
  dplyr::filter(escolaridad_rec_num>ant_escolaridad_rec_num) %>% 
  dplyr::select(hash_key,rn,fech_ing_num, escolaridad_rec, escolaridad_rec_num, sig_escolaridad_rec_num,ant_escolaridad_rec_num) %>% 
  nrow())
## [1] "Check inconsistencies with posterior educational attainments (0= No inconsistencies): 0"


We ended having 241 missing values in educational attainment (users=238), because the imputed values did not fulfilled the requirements of a progression of the educational attainment (eg., a user could not respond to have completed secondary school, but then answer that he had completed primary school only), for example, due to ties in the imputed values or no imputed values.


Marital status

Additionally, we replaced missing values of the marital status (n=198). Since different marital status were not particularly more vulnerable between each other, we selected the most frequent imputed value among the different imputed databases.


# Ver distintos valores propuestos para estado conyugal
estado_conyugal_2_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$estado_conyugal_2,
       amelia_fit$imputations$imp2$estado_conyugal_2,
       amelia_fit$imputations$imp3$estado_conyugal_2,
       amelia_fit$imputations$imp4$estado_conyugal_2,
       amelia_fit$imputations$imp5$estado_conyugal_2,
       amelia_fit$imputations$imp6$estado_conyugal_2,
       amelia_fit$imputations$imp7$estado_conyugal_2,
       amelia_fit$imputations$imp8$estado_conyugal_2,
       amelia_fit$imputations$imp9$estado_conyugal_2,
       amelia_fit$imputations$imp10$estado_conyugal_2,
       amelia_fit$imputations$imp11$estado_conyugal_2,
       amelia_fit$imputations$imp12$estado_conyugal_2,
       amelia_fit$imputations$imp13$estado_conyugal_2,
       amelia_fit$imputations$imp14$estado_conyugal_2,
       amelia_fit$imputations$imp15$estado_conyugal_2,
       amelia_fit$imputations$imp16$estado_conyugal_2,
       amelia_fit$imputations$imp17$estado_conyugal_2,
       amelia_fit$imputations$imp18$estado_conyugal_2,
       amelia_fit$imputations$imp19$estado_conyugal_2,
       amelia_fit$imputations$imp20$estado_conyugal_2,
       amelia_fit$imputations$imp21$estado_conyugal_2,
       amelia_fit$imputations$imp22$estado_conyugal_2,
       amelia_fit$imputations$imp23$estado_conyugal_2,
       amelia_fit$imputations$imp24$estado_conyugal_2,
       amelia_fit$imputations$imp25$estado_conyugal_2,
       amelia_fit$imputations$imp26$estado_conyugal_2,
       amelia_fit$imputations$imp27$estado_conyugal_2,
       amelia_fit$imputations$imp28$estado_conyugal_2,
       amelia_fit$imputations$imp29$estado_conyugal_2,
       amelia_fit$imputations$imp30$estado_conyugal_2
       ) 

estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Married/Shared living arrangements",as.character(.))~1,TRUE~0), .names="married_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Separated/Divorced",as.character(.))~1,TRUE~0), .names="sep_div_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Single",as.character(.))~1,TRUE~0), .names="singl_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Widower",as.character(.))~1,TRUE~0), .names="widow_{col}"))%>%
 
  dplyr::mutate(estado_conyugal_2_married = base::rowSums(dplyr::select(., starts_with("married_"))))%>%
  dplyr::mutate(estado_conyugal_2_sep_div = base::rowSums(dplyr::select(., starts_with("sep_div_"))))%>%
  dplyr::mutate(estado_conyugal_2_singl = base::rowSums(dplyr::select(., starts_with("singl_"))))%>%
  dplyr::mutate(estado_conyugal_2_wid = base::rowSums(dplyr::select(., starts_with("widow_"))))%>%
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_married>0~1,TRUE~0)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_sep_div>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_singl>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_wid>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  janitor::clean_names()
  
estado_conyugal_2_imputed_cat_est_cony<-  
    estado_conyugal_2_imputed %>%
        tidyr::pivot_longer(c(estado_conyugal_2_married, estado_conyugal_2_sep_div, estado_conyugal_2_singl, estado_conyugal_2_wid), names_to = "cat_est_conyugal", values_to = "count") %>%
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(estado_conyugal_2_imputed_max=max(count,na.rm=T)) %>% 
        dplyr::ungroup() %>% 
        dplyr::filter(estado_conyugal_2_imputed_max==count) %>% 
        dplyr::select(amelia_fit_imputations_imp1_row,cat_est_conyugal,count) %>% 
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(n_row=n()) %>% 
        dplyr::ungroup() %>% 
        dplyr::mutate(cat_est_conyugal=dplyr::case_when(n_row>1~NA_character_,
                                                        TRUE~cat_est_conyugal)) %>% 
        dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
  
estado_conyugal_2_imputed<-
  estado_conyugal_2_imputed %>% 
    dplyr::left_join(estado_conyugal_2_imputed_cat_est_cony, by="amelia_fit_imputations_imp1_row") %>%
    dplyr::mutate(cat_est_conyugal=dplyr::case_when(cat_est_conyugal=="estado_conyugal_2_married"~"Married/Shared living arrangements",cat_est_conyugal=="estado_conyugal_2_sep_div"~"Separated/Divorced",cat_est_conyugal=="estado_conyugal_2_singl"~"Single",cat_est_conyugal=="estado_conyugal_2_wid"~"Widower"
    ))%>% 
  janitor::clean_names()

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss5<-
CONS_C1_df_dup_SEP_2020_match_miss4 %>% 
   dplyr::left_join(dplyr::select(estado_conyugal_2_imputed,amelia_fit_imputations_imp1_row,cat_est_conyugal), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(estado_conyugal_2=factor(dplyr::case_when(is.na(estado_conyugal_2)~as.character(cat_est_conyugal),TRUE~as.character(estado_conyugal_2)))) %>% 
  data.table()

no_calzaron_estado_cony<-
CONS_C1_df_dup_SEP_2020_match_miss5 %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(hash_key) %>% unlist()

#CONS_C1_df_dup_SEP_2020_match_miss5 %>% 
#dplyr::filter(hash_key %in% CONS_C1_df_dup_SEP_2020_match_miss5 %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(hash_key) %>% unlist())


We could not resolve Marital status in 14 cases due to ties in the most frequent values.


Region & Type of Center (Public)

We looked over possible imputations to region of the center (n=28) and type of the center (public or private) (n=28).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub

#no hay información. debemos imputar
no_mostrar=0
if (no_mostrar==1){
tipo_centro_nombre_region_nas_nombre_region<-
CONS_C1_df_dup_SEP_2020 %>% 
    #dplyr::filter(row %in% unlist(unique(CONS_C1_df_dup_SEP_2020_match[,"row"]))) %>% 
    dplyr::filter(is.na(nombre_region)) %>% 
    janitor::tabyl(tipo_centro, nombre_region) 
}

nombre_region_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$nombre_region,
       amelia_fit$imputations$imp2$nombre_region,
       amelia_fit$imputations$imp3$nombre_region,
       amelia_fit$imputations$imp4$nombre_region,
       amelia_fit$imputations$imp5$nombre_region,
       amelia_fit$imputations$imp6$nombre_region,
       amelia_fit$imputations$imp7$nombre_region,
       amelia_fit$imputations$imp8$nombre_region,
       amelia_fit$imputations$imp9$nombre_region,
       amelia_fit$imputations$imp10$nombre_region,
       amelia_fit$imputations$imp11$nombre_region,
       amelia_fit$imputations$imp12$nombre_region,
       amelia_fit$imputations$imp13$nombre_region,
       amelia_fit$imputations$imp14$nombre_region,
       amelia_fit$imputations$imp15$nombre_region,
       amelia_fit$imputations$imp16$nombre_region,
       amelia_fit$imputations$imp17$nombre_region,
       amelia_fit$imputations$imp18$nombre_region,
       amelia_fit$imputations$imp19$nombre_region,
       amelia_fit$imputations$imp20$nombre_region,
       amelia_fit$imputations$imp21$nombre_region,
       amelia_fit$imputations$imp22$nombre_region,
       amelia_fit$imputations$imp23$nombre_region,
       amelia_fit$imputations$imp24$nombre_region,
       amelia_fit$imputations$imp25$nombre_region,
       amelia_fit$imputations$imp26$nombre_region,
       amelia_fit$imputations$imp27$nombre_region,
       amelia_fit$imputations$imp28$nombre_region,
       amelia_fit$imputations$imp29$nombre_region,
       amelia_fit$imputations$imp30$nombre_region
       ) 
nombre_region_imputed<-
nombre_region_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Antofagasta",as.character(.))~1,TRUE~0), .names="reg_02_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Araucan",as.character(.))~1,TRUE~0), .names="reg_09_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Arica",as.character(.))~1,TRUE~0), .names="reg_15_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Atacama",as.character(.))~1,TRUE~0), .names="reg_03_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Ays",as.character(.))~1,TRUE~0), .names="reg_11_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Biob",as.character(.))~1,TRUE~0), .names="reg_08_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Coquimbo",as.character(.))~1,TRUE~0), .names="reg_04_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Los Lagos",as.character(.))~1,TRUE~0), .names="reg_10_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Los R",as.character(.))~1,TRUE~0), .names="reg_14_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Magallanes",as.character(.))~1,TRUE~0), .names="reg_12_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Maule",as.character(.))~1,TRUE~0), .names="reg_07_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Metropolitana",as.character(.))~1,TRUE~0), .names="reg_13_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("uble",as.character(.))~1,TRUE~0), .names="reg_16_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Higgins",as.character(.))~1,TRUE~0), .names="reg_06_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Tarapac",as.character(.))~1,TRUE~0), .names="reg_01_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Valpara",as.character(.))~1,TRUE~0), .names="reg_05_{col}"))%>%
  
 
  dplyr::mutate(nombre_region_02 = base::rowSums(dplyr::select(., starts_with("reg_02_"))))%>%
  dplyr::mutate(nombre_region_09 = base::rowSums(dplyr::select(., starts_with("reg_09_"))))%>%
  dplyr::mutate(nombre_region_15 = base::rowSums(dplyr::select(., starts_with("reg_15_"))))%>%
  dplyr::mutate(nombre_region_03 = base::rowSums(dplyr::select(., starts_with("reg_03_"))))%>%
  dplyr::mutate(nombre_region_11 = base::rowSums(dplyr::select(., starts_with("reg_11_"))))%>%
  dplyr::mutate(nombre_region_08 = base::rowSums(dplyr::select(., starts_with("reg_08_"))))%>%
  dplyr::mutate(nombre_region_04 = base::rowSums(dplyr::select(., starts_with("reg_04_"))))%>%
  dplyr::mutate(nombre_region_10 = base::rowSums(dplyr::select(., starts_with("reg_10_"))))%>%
  dplyr::mutate(nombre_region_14 = base::rowSums(dplyr::select(., starts_with("reg_14_"))))%>%
  dplyr::mutate(nombre_region_12 = base::rowSums(dplyr::select(., starts_with("reg_12_"))))%>%
  dplyr::mutate(nombre_region_07 = base::rowSums(dplyr::select(., starts_with("reg_07_"))))%>%
  dplyr::mutate(nombre_region_13 = base::rowSums(dplyr::select(., starts_with("reg_13_"))))%>%
  dplyr::mutate(nombre_region_16 = base::rowSums(dplyr::select(., starts_with("reg_16_"))))%>%
  dplyr::mutate(nombre_region_06 = base::rowSums(dplyr::select(., starts_with("reg_06_"))))%>%
  dplyr::mutate(nombre_region_01 = base::rowSums(dplyr::select(., starts_with("reg_01_"))))%>%
  dplyr::mutate(nombre_region_05 = base::rowSums(dplyr::select(., starts_with("reg_05_"))))%>%
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_02>0~1,TRUE~0)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_09>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_15>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_03>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_11>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_08>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_04>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_10>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_14>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_12>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_07>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_13>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_16>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_06>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_01>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_05>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  janitor::clean_names()
  
nombre_region_imputed_cat_reg<-  
    nombre_region_imputed %>%
        tidyr::pivot_longer(c(nombre_region_01, nombre_region_02, nombre_region_03, nombre_region_04, nombre_region_05, nombre_region_06, nombre_region_07, nombre_region_08, nombre_region_09, nombre_region_10, nombre_region_11, nombre_region_12, nombre_region_13, nombre_region_14, nombre_region_15), names_to = "cat_nombre_region", values_to = "count") %>%
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(nombre_region_imputed_max=max(count,na.rm=T)) %>% 
        dplyr::ungroup() %>% 
        dplyr::filter(nombre_region_imputed_max==count) %>% 
        dplyr::select(amelia_fit_imputations_imp1_row,cat_nombre_region,count) %>% 
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(n_row=n()) %>% 
        dplyr::ungroup() %>% 
        dplyr::mutate(cat_nombre_region=dplyr::case_when(n_row>1~NA_character_,
                                                        TRUE~cat_nombre_region)) %>% 
        dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
  
nombre_region_imputed<-
  nombre_region_imputed %>% 
    dplyr::left_join(nombre_region_imputed_cat_reg, by="amelia_fit_imputations_imp1_row") %>%
    dplyr::mutate(cat_nombre_region=dplyr::case_when(cat_nombre_region=="nombre_region_01"~"Tarapacá (01)",cat_nombre_region=="nombre_region_02"~"Antofagasta (02)",cat_nombre_region=="nombre_region_03"~"Atacama (03)",cat_nombre_region=="nombre_region_04"~"Coquimbo (04)",cat_nombre_region=="nombre_region_05"~"Valparaíso (05)",cat_nombre_region=="nombre_region_06"~"O'Higgins (06)",cat_nombre_region=="nombre_region_07"~"Maule (07)",cat_nombre_region=="nombre_region_08"~"Biobío (08)",cat_nombre_region=="nombre_region_09"~"Araucanía (09)",cat_nombre_region=="nombre_region_10"~"Los Lagos (10)",cat_nombre_region=="nombre_region_11"~"Aysén (11)",cat_nombre_region=="nombre_region_12"~"Magallanes (12)",cat_nombre_region=="nombre_region_13"~"Metropolitana (13)",
                                                 cat_nombre_region=="nombre_region_14"~"Los Ríos (14)",cat_nombre_region=="nombre_region_15"~"Arica (15)",cat_nombre_region=="nombre_region_16"~"Ñuble (16)",
    ))%>% 
  janitor::clean_names()

#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_
tipo_centro_pub_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$tipo_centro_pub,
       amelia_fit$imputations$imp2$tipo_centro_pub,
       amelia_fit$imputations$imp3$tipo_centro_pub,
       amelia_fit$imputations$imp4$tipo_centro_pub,
       amelia_fit$imputations$imp5$tipo_centro_pub,
       amelia_fit$imputations$imp6$tipo_centro_pub,
       amelia_fit$imputations$imp7$tipo_centro_pub,
       amelia_fit$imputations$imp8$tipo_centro_pub,
       amelia_fit$imputations$imp9$tipo_centro_pub,
       amelia_fit$imputations$imp10$tipo_centro_pub,
       amelia_fit$imputations$imp11$tipo_centro_pub,
       amelia_fit$imputations$imp12$tipo_centro_pub,
       amelia_fit$imputations$imp13$tipo_centro_pub,
       amelia_fit$imputations$imp14$tipo_centro_pub,
       amelia_fit$imputations$imp15$tipo_centro_pub,
       amelia_fit$imputations$imp16$tipo_centro_pub,
       amelia_fit$imputations$imp17$tipo_centro_pub,
       amelia_fit$imputations$imp18$tipo_centro_pub,
       amelia_fit$imputations$imp19$tipo_centro_pub,
       amelia_fit$imputations$imp20$tipo_centro_pub,
       amelia_fit$imputations$imp21$tipo_centro_pub,
       amelia_fit$imputations$imp22$tipo_centro_pub,
       amelia_fit$imputations$imp23$tipo_centro_pub,
       amelia_fit$imputations$imp24$tipo_centro_pub,
       amelia_fit$imputations$imp25$tipo_centro_pub,
       amelia_fit$imputations$imp26$tipo_centro_pub,
       amelia_fit$imputations$imp27$tipo_centro_pub,
       amelia_fit$imputations$imp28$tipo_centro_pub,
       amelia_fit$imputations$imp29$tipo_centro_pub,
       amelia_fit$imputations$imp30$tipo_centro_pub
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::filter(value==TRUE) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::summarise(tipo_centro_pub_to_imputation=ifelse(n()>15,1,0))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss6<-
CONS_C1_df_dup_SEP_2020_match_miss5 %>% 
   dplyr::left_join(dplyr::select(nombre_region_imputed,amelia_fit_imputations_imp1_row,cat_nombre_region), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(nombre_region=factor(dplyr::case_when(is.na(nombre_region)~as.character(cat_nombre_region),TRUE~as.character(nombre_region)))) %>% 
  dplyr::left_join(dplyr::select(tipo_centro_pub_imputed,amelia_fit_imputations_imp1_row,tipo_centro_pub_to_imputation), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(tipo_centro_pub=factor(dplyr::case_when(is.na(tipo_centro_pub)~as.logical(tipo_centro_pub_to_imputation),TRUE~as.logical(tipo_centro_pub)))) %>%
  dplyr::select(-c(cat_est_conyugal,cat_nombre_region,tipo_centro_pub_to_imputation,tipo_centro_pub_to_imputation)) %>% 
  data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))


There were impossible to impute region of the center in 6 cases due to ties in the different imputed values. In case of public or private center, there were no missing values once imputed.


Diagnose of Drug Consumption

We looked over possible imputations to the diagnosis of drug consumption (n=1).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub

dg_trs_cons_sus_or_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp2$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp3$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp4$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp5$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp6$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp7$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp8$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp9$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp10$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp11$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp12$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp13$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp14$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp15$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp16$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp17$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp18$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp19$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp20$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp21$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp22$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp23$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp24$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp25$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp26$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp27$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp28$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp29$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp30$dg_trs_cons_sus_or
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::filter(value==TRUE) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::summarise(dg_trs_cons_imputation=ifelse(n()>15,1,0))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss7<-
CONS_C1_df_dup_SEP_2020_match_miss6 %>% 
    dplyr::left_join(dplyr::select(dg_trs_cons_sus_or_imputed,amelia_fit_imputations_imp1_row,dg_trs_cons_imputation), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(dg_trs_cons_sus_or=factor(dplyr::case_when(is.na(dg_trs_cons_sus_or)~as.logical(dg_trs_cons_imputation),TRUE~as.logical(dg_trs_cons_sus_or)))) %>%
  dplyr::select(-dg_trs_cons_imputation) %>% 
  data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))


Cause of Discharge

We looked over possible imputations to the truly missing values, discarding missing values due to censorship (n=20).

motivo_de_egreso_a_imputar<-
CONS_C1_df_dup_SEP_2020_match_miss %>% dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_imp)) %>% dplyr::filter(!is.na(fech_egres_imp))%>%dplyr::select(row)

motivodeegreso_mod_imp_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp2$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp3$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp4$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp5$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp6$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp7$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp8$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp9$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp10$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp11$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp12$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp13$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp14$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp15$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp16$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp17$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp18$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp19$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp20$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp21$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp22$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp23$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp24$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp25$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp26$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp27$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp28$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp29$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp30$motivodeegreso_mod_imp
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(amelia_fit_imputations_imp1_row) %>% 
  dplyr::ungroup() %>% 
  dplyr::filter(amelia_fit_imputations_imp1_row %in% unlist(motivo_de_egreso_a_imputar$row)) %>% 
  #FILTRAR CASOS QUE SON ILÓGICOS: MUERTES CON TRATAMIENTOS POSTERIORES (1)
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,dup, duplicates_filtered,evaluacindelprocesoteraputico),by=c("amelia_fit_imputations_imp1_row"="row")) %>% 
  dplyr::mutate(value_death=dplyr::case_when(value=="Death"& duplicates_filtered>dup~1,TRUE~0)) %>% 
  dplyr::filter(value_death!=1) %>%  
  #:#:#:#:#:
  dplyr::count(amelia_fit_imputations_imp1_row,value) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::slice_min(n, n = 1) %>% 
  dplyr::summarise(adm_dis=sum(value == "Administrative discharge",na.rm=T),
                    death=sum(value == "Death",na.rm=T),
                    referral=sum(value == "Referral to another treatment",na.rm=T),
                    ter_dis=sum(value == "Therapeutic discharge",na.rm=T),
                    dropout=sum(value =="Drop-out",na.rm=T)) %>% 
  rowwise() %>% 
  dplyr::mutate(ties=sum(c_across(adm_dis:dropout)),ties=ifelse(ties>1,1,0)) %>% 
  #dplyr::filter(ties==1) %>% 
  dplyr::ungroup() %>% 
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,fech_egres_num,dup, duplicates_filtered,evaluacindelprocesoteraputico,tipo_centro_derivacion),by=c("amelia_fit_imputations_imp1_row"="row")) %>% 
  dplyr::mutate(motivodeegreso_mod_imp_imputation= dplyr::case_when(
    ties==0 & adm_dis==1 & fech_egres_imp<"2019-11-13"~"Administrative discharge",
    #its an absorving state. should not have posterior treatments
    ties==0 & death==1 & fech_egres_imp<"2019-11-13" & dup==duplicates_filtered~"Death",
    ties==0 & referral==1 & fech_egres_imp<"2019-11-13"~"Referral to another treatment",
    ties==0 & ter_dis==1 & fech_egres_imp<"2019-11-13"~"Therapeutic discharge",
    ties==0 & dropout==1 & fech_egres_imp<"2019-11-13"~"Drop-out",
    #si no hay fecha de egreso, está en la fecha de censura, sólo puede ser tratamiento en curso
    fech_egres_imp>="2019-11-13"~NA_character_,
    TRUE~NA_character_)) %>% 
    #si tiene evaluacindelprocesoteraputico, es porque no es un tratamiento en curso
  dplyr::rename("motivodeegreso_mod_imp_original"="motivodeegreso_mod_imp")

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:
CONS_C1_df_dup_SEP_2020_match_miss8<-
CONS_C1_df_dup_SEP_2020_match_miss7 %>% 
   dplyr::left_join(motivodeegreso_mod_imp_imputed[,c("amelia_fit_imputations_imp1_row","motivodeegreso_mod_imp_original","fech_egres_imp","fech_egres_num","motivodeegreso_mod_imp_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
  #dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::select(row,hash_key,motivodeegreso_mod_imp_original, motivodeegreso_mod_imp_imputation,motivodeegreso_mod_imp,fech_egres_num,fech_egres_imp)
      dplyr::mutate(motivodeegreso_mod_imp=factor(dplyr::case_when(is.na(motivodeegreso_mod_imp)~motivodeegreso_mod_imp_imputation,
                                                                   motivodeegreso_mod_imp_original=="Ongoing treatment"~NA_character_, TRUE~as.character(motivodeegreso_mod_imp)))) %>% 
  dplyr::select(-motivodeegreso_mod_imp_imputation,-fech_egres_imp,-fech_egres_num,-motivodeegreso_mod_imp_original) %>% 
  #dplyr::rename_all( list(~paste0(., ".left"))) %>% 
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp) %>% 
                     dplyr::rename("motivodeegreso_mod_imp_original"="motivodeegreso_mod_imp"),by="row") %>%
  data.table()

# CONS_C1_df_dup_SEP_2020_match_miss8 %>% janitor::tabyl(motivodeegreso_mod_imp,motivodeegreso_mod_imp_original)
#CONS_C1_df_dup_SEP_2020_match_miss8 %>% janitor::tabyl(motivodeegreso_mod_imp_original)

#
if(
CONS_C1_df_dup_SEP_2020_match_miss8 %>% dplyr::filter(motivodeegreso_mod_imp_original!="Ongoing treatment",is.na(motivodeegreso_mod_imp)) %>% nrow()>0){"There are missing values on the cause of discharge"}


A total of 3 cases were not imputed due to ties in the imputed values.


Evaluation of the Therapeutic Process

Another variable that is worth imputing is the Evaluation of the Therapeutic Process at Discharge (n= 7,378). In case of ties, we selected the imputed values with the value with the minimum evaluation. Must consider that most of the null values could be explained by censoring or not completion of the treatment at the period of the study (n= 7,361).


# Ver distintos valores propuestos para sustancia de inciio
evaluacindelprocesoteraputico_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp2$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp3$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp4$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp5$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp6$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp7$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp8$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp9$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp10$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp11$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp12$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp13$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp14$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp15$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp16$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp17$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp18$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp19$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp20$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp21$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp22$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp23$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp24$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp25$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp26$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp27$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp28$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp29$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp30$evaluacindelprocesoteraputico
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(amelia_fit_imputations_imp1_row) %>% 
  dplyr::ungroup() %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
  dplyr::summarise(high_ach_1=sum(value == "1-High Achievement",na.rm=T),
                   med_ach_2=sum(value == "2-Medium Achievement",na.rm=T),
                  min_ach_3=sum(value =="3-Minimum Achievement",na.rm=T)) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(evaluacindelprocesoteraputico_imputation= dplyr::case_when(
      (high_ach_1 >med_ach_2) & (med_ach_2 >min_ach_3)~"1-High Achievement",
      (med_ach_2>high_ach_1) & (med_ach_2 >min_ach_3)~"2-Medium Achievement",
      (min_ach_3>med_ach_2) & (min_ach_3 >high_ach_1)~"3-Minimum Achievement"))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
##
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)

CONS_C1_df_dup_SEP_2020_match_miss9<-
CONS_C1_df_dup_SEP_2020_match_miss8 %>% 
   dplyr::left_join(evaluacindelprocesoteraputico_imputed[,c("amelia_fit_imputations_imp1_row","evaluacindelprocesoteraputico_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
    dplyr::mutate(evaluacindelprocesoteraputico=factor(dplyr::case_when(is.na(evaluacindelprocesoteraputico) & motivodeegreso_mod_imp %in% c("Drop-out","Administrative discharge","Therapeutic discharge","Referral to another treatment")~evaluacindelprocesoteraputico_imputation,
                                                                        is.na(motivodeegreso_mod_imp)~NA_character_,
                                                                        TRUE~as.character(evaluacindelprocesoteraputico)))) %>% 
     dplyr::mutate(evaluacindelprocesoteraputico=parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
  dplyr::select(-evaluacindelprocesoteraputico_imputation) %>% 
  data.table()

CONS_C1_df_dup_SEP_2020_match_miss9 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico) %>% 
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 2. Cause of Discharge vs. Evaluation of the Therapeutic Procress"),
               col.names = c("Cause of Discharge","1-High Achievement", "2- Medium Achievement","3- Minimum Achievement","Null Values"),
               align =rep('c', 101)) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 9) %>%
  kableExtra::scroll_box(width = "100%", height = "375px") 
Table 2. Cause of Discharge vs. Evaluation of the Therapeutic Procress
Cause of Discharge 1-High Achievement 2- Medium Achievement 3- Minimum Achievement Null Values
Administrative discharge 867 4,427 4,488 0
Death 0 0 1 0
Drop-out 1,767 16,839 37,301 0
Referral to another treatment 1,298 5,835 4,705 0
Therapeutic discharge 17,120 6,135 1,118 1
NA 0 0 0 7,854


As seen in the table above, ongoing treatments did not have an evaluation process, which is logically valid, since their treatment competition was not captured.


Treatment Setting (Residential)

We looked over possible imputations to the treatment setting (n=97).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub

tipo_de_plan_res_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$tipo_de_plan_res,
       amelia_fit$imputations$imp2$tipo_de_plan_res,
       amelia_fit$imputations$imp3$tipo_de_plan_res,
       amelia_fit$imputations$imp4$tipo_de_plan_res,
       amelia_fit$imputations$imp5$tipo_de_plan_res,
       amelia_fit$imputations$imp6$tipo_de_plan_res,
       amelia_fit$imputations$imp7$tipo_de_plan_res,
       amelia_fit$imputations$imp8$tipo_de_plan_res,
       amelia_fit$imputations$imp9$tipo_de_plan_res,
       amelia_fit$imputations$imp10$tipo_de_plan_res,
       amelia_fit$imputations$imp11$tipo_de_plan_res,
       amelia_fit$imputations$imp12$tipo_de_plan_res,
       amelia_fit$imputations$imp13$tipo_de_plan_res,
       amelia_fit$imputations$imp14$tipo_de_plan_res,
       amelia_fit$imputations$imp15$tipo_de_plan_res,
       amelia_fit$imputations$imp16$tipo_de_plan_res,
       amelia_fit$imputations$imp17$tipo_de_plan_res,
       amelia_fit$imputations$imp18$tipo_de_plan_res,
       amelia_fit$imputations$imp19$tipo_de_plan_res,
       amelia_fit$imputations$imp20$tipo_de_plan_res,
       amelia_fit$imputations$imp21$tipo_de_plan_res,
       amelia_fit$imputations$imp22$tipo_de_plan_res,
       amelia_fit$imputations$imp23$tipo_de_plan_res,
       amelia_fit$imputations$imp24$tipo_de_plan_res,
       amelia_fit$imputations$imp25$tipo_de_plan_res,
       amelia_fit$imputations$imp26$tipo_de_plan_res,
       amelia_fit$imputations$imp27$tipo_de_plan_res,
       amelia_fit$imputations$imp28$tipo_de_plan_res,
       amelia_fit$imputations$imp29$tipo_de_plan_res,
       amelia_fit$imputations$imp30$tipo_de_plan_res
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::summarise(n_res=sum(value=="1",na.rm=T),n_amb=sum(value=="0",na.rm=T))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss10<-
CONS_C1_df_dup_SEP_2020_match_miss9 %>% 
    dplyr::left_join(dplyr::select(tipo_de_plan_res_imputed,amelia_fit_imputations_imp1_row,n_res,n_amb), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(tipo_de_plan_res=factor(dplyr::case_when(is.na(tipo_de_plan_res)& (n_res>n_amb)~"1",is.na(tipo_de_plan_res)& (n_res<n_amb)~"0",TRUE~as.character(tipo_de_plan_res)))) %>%
  dplyr::select(-n_res,-n_amb) %>% 
  data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))

As a result of the process of imputation, some values were not possible to impute (n=97).


Sample Characteristics

We checked the characteristics of the sample depending on type of treatment (Residential or Outpatients).


#prop.table(table(CONS_C1_df_dup_SEP_2020_match$abandono_temprano_rec,CONS_C1_df_dup_SEP_2020_match$tipo_de_plan_res),2)
match.on_tot <- c("row", "hash_key","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","tipo_centro_pub","abandono_temprano_rec","evaluacindelprocesoteraputico","motivodeegreso_mod_imp","dg_trs_cons_sus_or","tipo_de_plan_res","sexo_2","edad_al_ing","fech_ing_num")
#$109,756
#añado los imputados
CONS_C1_df_dup_SEP_2020_match_miss_after_imp<-
CONS_C1_df_dup_SEP_2020_match_miss %>% 
  dplyr::select(-sus_ini_mod_mvv,-estado_conyugal_2,-escolaridad_rec,-freq_cons_sus_prin,-nombre_region,-tipo_centro_pub,-evaluacindelprocesoteraputico,-motivodeegreso_mod_imp,-dg_trs_cons_sus_or,-tipo_de_plan_res,-edad_ini_cons,-via_adm_sus_prin_act) %>% #
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020_match_miss10,
                                 row,
                                 sus_ini_mod_mvv,
                                 estado_conyugal_2,
                                 escolaridad_rec,
                                 freq_cons_sus_prin,
                                 nombre_region,
                                 tipo_centro_pub,
                                 evaluacindelprocesoteraputico,
                                 motivodeegreso_mod_imp,
                                 dg_trs_cons_sus_or,
                                 tipo_de_plan_res,
                                 edad_ini_cons,rn),by="row") %>% 
  dplyr::arrange(tipo_de_plan_res,hash_key,rn) %>% 
  #elimino esta variable porque es accesoria
  dplyr::select(-edad_ini_sus_prin) %>% 
  #para transformar el motivo de egreso
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_num,dias_treat_imp_sin_na),by="row") %>%
  #dplyr::filter(fech_egres_num==18213,!is.na(motivodeegreso_mod_imp)) %>% 
  dplyr::mutate(motivodeegreso_mod_imp=dplyr::case_when(dias_treat_imp_sin_na>=90 & motivodeegreso_mod_imp=="Drop-out"~ "Late Drop-out",
                                                        dias_treat_imp_sin_na<90 & motivodeegreso_mod_imp=="Drop-out"~ "Early Drop-out",
                                                        fech_egres_num==18213 & is.na(motivodeegreso_mod_imp)~"Ongoing treatment",
                                                        TRUE~as.character(motivodeegreso_mod_imp)
                                                        )) %>% #janitor::tabyl(motivodeegreso_mod_imp)
  dplyr::mutate(evaluacindelprocesoteraputico2=dplyr::case_when(fech_egres_num==18213 & is.na(evaluacindelprocesoteraputico)~"Ongoing treatment",
                                                        TRUE~as.character(evaluacindelprocesoteraputico)
  )) %>% 
  dplyr::mutate(sum_miss = base::rowSums(is.na(dplyr::select(.,c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","freq_cons_sus_prin","nombre_region","tipo_centro_pub","evaluacindelprocesoteraputico2","motivodeegreso_mod_imp","dg_trs_cons_sus_or","tipo_de_plan_res","edad_ini_cons","sexo_2","edad_al_ing","fech_ing_num"))))) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(sum_miss=sum(sum_miss)) %>% 
  dplyr::ungroup() 

CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados <-
  CONS_C1_df_dup_SEP_2020_match_miss_after_imp %>% 
  dplyr::filter(sum_miss>0)

CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados <-
  CONS_C1_df_dup_SEP_2020_match_miss_after_imp %>% 
  dplyr::filter(sum_miss==0) %>% 
  dplyr::select(-sum_miss) %>% 
  dplyr::left_join(CONS_C1_df_dup_SEP_2020[c("row","condicion_ocupacional_corr")], by="row") %>% 
  dplyr::select(-evaluacindelprocesoteraputico2)

#  CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados[complete.cases(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados[,..match.on_tot]),..match.on_tot] 


Considering that some missing values were not able to imputation (due to ties in the candidate values for imputation or inconsistent values for imputations) (337, users=272), we ended the process having 109,419 complete cases (users=84,776).


kableone <- function(x, ...) {
  capture.output(x <- print(x,...))
  knitr::kable(x,format= "html", format.args= list(decimal.mark= ".", big.mark= ","))
}
match.on.sel<-c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","dg_trs_cons_sus_or", "tipo_centro_pub","sexo_2","edad_al_ing","fech_ing_num","condicion_ocupacional_corr")
catVars<-
c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","tipo_centro_pub","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","dg_trs_cons_sus_or","nombre_region","tipo_de_plan_res","sexo_2","condicion_ocupacional_corr")
#length(unique(CONS_C1_df_dup_SEP_2020_match$fech_ing_num))
#:#:#:#:#: DISMINUIR LA HETEROGENEIDAD DE LA FECHA DE INGRESO
# FORMAS DE CONSTREÑIR LA VARIABLE:
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-cut(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,100)
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-CONS_C1_df_dup_SEP_2020_match_fech_ing_num
#CONS_C1_df_dup_SEP_2020_match_fech_ing_num<-CONS_C1_df_dup_SEP_2020_match$fech_ing_num
#length(unique(round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,0)))
#length(unique(round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)))

#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)
#:#:#:#:#: 

paste0("Inconsistencies in dup vs. rn: ",CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados%>% 
         dplyr::filter(dup!=rn) %>% nrow())
## [1] "Inconsistencies in dup vs. rn: 0"
CONS_C1_df_dup_SEP_2020_match_not_miss2 <-
  CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(dup==1) %>% 
  dplyr::select(-rn,-dias_treat_imp_sin_na,-fech_egres_num)

attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$sus_ini_mod_mvv,"label")<-"Starting Substance"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$estado_conyugal_2,"label")<-"Marital Status"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$escolaridad_rec,"label")<-"Educational Attainment"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$edad_ini_cons,"label")<-"Age of Onset of Drug Use"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$freq_cons_sus_prin,"label")<-"Frequency of use of primary drug"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$nombre_region,"label")<-"Region of the Center"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$dg_cie_10_rec,"label")<-"Psychiatric Comorbidity"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$dg_trs_cons_sus_or,"label")<-"Drug Dependence"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$evaluacindelprocesoteraputico,"label")<-"Evaluation of the Therapeutic Process"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$abandono_temprano_rec,"label")<-"Early Discharge"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res,"label")<-"Residential"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_centro_pub,"label")<-"Public Center"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$condicion_ocupacional_corr,"label")<-"Occupational Status"

pre_tab1<-Sys.time()
tab1<-
CreateTableOne(vars = match.on.sel, strata = "tipo_de_plan_res", 
                       data = CONS_C1_df_dup_SEP_2020_match_not_miss2, factorVars = catVars, smd=T)
post_tab1<-Sys.time()
diff_time_tab1=post_tab1-pre_tab1

kableone(tab1, 
         caption = paste0("Table 5. Covariate Balance in the Variables of Interest"),
         col.names= c("Variables","Ambulatory","Residential", "p-values","test","SMD"),
         nonnormal= c("edad_ini_cons","edad_al_ing","fech_ing_num"),#"\\hline",
                       smd=T, test=T, varLabels=T,noSpaces=T, printToggle=T, dropEqual=F) %>% 
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 10) %>%
  #()
  row_spec(1, bold = T, italic =T,color ="black",hline_after=T,extra_latex_after="\\arrayrulecolor{white}",font_size= 10) %>%
  #footnote(general = "Here is a general comments of the table. ",
  #        number = c("Footnote 1; ", "Footnote 2; "),
  #         alphabet = c("Footnote A; ", "Footnote B; "),
  #         symbol = c("Footnote Symbol 1; ", "Footnote Symbol 2")
  #         )%>%
  scroll_box(width = "100%", height = "400px") 
0 1 p test SMD
n 72083 12693
Starting Substance (%) <0.001 0.369
Alcohol 41410 (57.4) 5074 (40.0)
Cocaine hydrochloride 2925 (4.1) 513 (4.0)
Cocaine paste 7671 (10.6) 2235 (17.6)
Marijuana 18417 (25.5) 4554 (35.9)
Other 1660 (2.3) 317 (2.5)
Marital Status (%) <0.001 0.309
Married/Shared living arrangements 26166 (36.3) 2911 (22.9)
Separated/Divorced 7713 (10.7) 1318 (10.4)
Single 37340 (51.8) 8332 (65.6)
Widower 864 (1.2) 132 (1.0)
Educational Attainment (%) <0.001 0.124
3-Completed primary school or less 21857 (30.3) 4572 (36.0)
2-Completed high school or less 37209 (51.6) 6136 (48.3)
1-More than high school 13017 (18.1) 1985 (15.6)
Age of Onset of Drug Use (median [IQR]) 15.00 [14.00, 18.00] 15.00 [13.00, 17.00] <0.001 nonnorm 0.090
Frequency of use of primary drug (%) <0.001 0.767
1 day a week or more 5323 (7.4) 273 (2.2)
2 to 3 days a week 22322 (31.0) 1323 (10.4)
4 to 6 days a week 12223 (17.0) 1649 (13.0)
Daily 28265 (39.2) 9231 (72.7)
Did not use 1094 (1.5) 84 (0.7)
Less than 1 day a week 2856 (4.0) 133 (1.0)
Origen de Ingreso (Primera Entrada)/Motive of Admission to Treatment (First Entry) (%) <0.001 0.509
Spontaneous 33648 (46.7) 4270 (33.6)
Assisted Referral 4933 (6.8) 3004 (23.7)
Other 3753 (5.2) 738 (5.8)
Justice Sector 7134 (9.9) 813 (6.4)
Health Sector 22615 (31.4) 3868 (30.5)
Psychiatric Comorbidity (%) <0.001 0.317
Without psychiatric comorbidity 29015 (40.3) 3247 (25.6)
Diagnosis unknown (under study) 13270 (18.4) 2763 (21.8)
With psychiatric comorbidity 29798 (41.3) 6683 (52.7)
Region of the Center (%) <0.001 0.388
Antofagasta (02) 2291 (3.2) 697 (5.5)
Araucanía (09) 2221 (3.1) 162 (1.3)
Arica (15) 1315 (1.8) 728 (5.7)
Atacama (03) 1831 (2.5) 258 (2.0)
Aysén (11) 797 (1.1) 42 (0.3)
Biobío (08) 5091 (7.1) 703 (5.5)
Coquimbo (04) 2798 (3.9) 268 (2.1)
Los Lagos (10) 2646 (3.7) 375 (3.0)
Los Ríos (14) 1113 (1.5) 185 (1.5)
Magallanes (12) 929 (1.3) 31 (0.2)
Maule (07) 4208 (5.8) 638 (5.0)
Metropolitana (13) 35961 (49.9) 6256 (49.3)
Ñuble (16) 540 (0.7) 20 (0.2)
O’Higgins (06) 3638 (5.0) 567 (4.5)
Tarapacá (01) 1350 (1.9) 598 (4.7)
Valparaíso (05) 5354 (7.4) 1165 (9.2)
Drug Dependence = TRUE (%) 50002 (69.4) 11645 (91.7) <0.001 0.589
Public Center = TRUE (%) 57121 (79.2) 3614 (28.5) <0.001 1.183
Sexo Usuario/Sex of User = Women (%) 17394 (24.1) 3937 (31.0) <0.001 0.155
Edad a la Fecha de Ingreso a Tratamiento (numérico continuo) (Primera Entrada)/Age at Admission to Treatment (First Entry) (median [IQR]) 34.43 [27.55, 43.46] 32.63 [26.34, 40.85] <0.001 nonnorm 0.185
Fecha de Ingreso a Tratamiento (Numérico)(c)/Date of Admission to Treatment (Numeric)(c) (median [IQR]) 16580.00 [15730.00, 17359.00] 16153.00 [15342.00, 17023.00] <0.001 nonnorm 0.293
Occupational Status (%) <0.001 1.025
Employed 39517 (54.8) 1771 (14.0)
Inactive 7674 (10.6) 1195 (9.4)
Looking for a job for the first time 172 (0.2) 20 (0.2)
No activity 2664 (3.7) 1820 (14.3)
Not seeking for work 492 (0.7) 335 (2.6)
Unemployed 21564 (29.9) 7552 (59.5)
#"tipo_de_plan_ambulatorio",
#https://cran.r-project.org/web/packages/tableone/vignettes/smd.html
#http://rstudio-pubs-static.s3.amazonaws.com/405765_2ce448f9bde24148a5f94c535a34b70e.html
#https://cran.r-project.org/web/packages/tableone/vignettes/introduction.html
#https://cran.r-project.org/web/packages/tableone/tableone.pdf
#https://www.rdocumentation.org/packages/tableone/versions/0.12.0/topics/CreateTableOne

## Construct a table 
#standardized mean differences of greater than 0.1


We checked the similarity in the samples using other measures, such as the variance ratio of the samples and Kolmogorov-Smirnov(KS) statistics.


library(cobalt)

bal2<-bal.tab(CONS_C1_df_dup_SEP_2020_match_not_miss2[,match.on.sel], treat = CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))
#"mean.diffs", "variance.ratios","ks.statistics","ovl.coefficient"

options(knitr.kable.NA = '')

bal2$Balance[,2]<-round(bal2$Balance[,2],2)
bal2$Balance[,4]<-round(bal2$Balance[,4],2)
bal2$Balance[,6]<-round(bal2$Balance[,6],2)

var_names<- 
    list("origen_ingreso_mod_Spontaneous"="Motive Admission-Spontaneous",
         "origen_ingreso_mod_Assisted Referral"= "Motive Admission-Assisted Referral",
         "origen_ingreso_mod_Other"="Motive Admission-Other",
         "origen_ingreso_mod_Justice Sector"= "Motive Admission-Justice Sector",
         "origen_ingreso_mod_Health Sector"="Motive Admission-Health Sector",
         "dg_cie_10_rec_Without psychiatric comorbidity"="ICD-10-Wo/Psych Comorbidity",
         "dg_cie_10_rec_Diagnosis unknown (under study)"="ICD-10-Dg. Unknown/under study",
         "dg_cie_10_rec_With psychiatric comorbidity"="ICD-10-W/Psych Comorbidity",
         "sexo_2_Women"="Sex-Women",
         "edad_al_ing"="Age at Admission",
         "fech_ing_num"="Date of Admission",
         "duplicates_filtered"="Treatments (#)",
         "more_one_treat"=">1 treatment",
         "sus_ini_mod_mvv_Alcohol"= "Starting Substance-Alcohol",
         "sus_ini_mod_mvv_Cocaine hydrochloride"= "Starting Substance-Cocaine hydrochloride",
         "sus_ini_mod_mvv_Cocaine paste"="Starting Substance-Cocaine paste",
         "sus_ini_mod_mvv_Marijuana"="Starting Substance-Marijuana",
         "sus_ini_mod_mvv_Other"="Starting Substance-Other",
         "estado_conyugal_2_Married/Shared living arrangements"="Marital Status-Married/Shared liv. arr.",
         "condicion_ocupacional_corr_Employed"="Occ.Status-Employed",
         "condicion_ocupacional_corr_Inactive"="Occ.Status-Inactive",
         "condicion_ocupacional_corr_Looking for a job for the first time"="Occ.Status-Looking 1st job",
         "condicion_ocupacional_corr_No activity"="Occ.Status- No activity",
         "condicion_ocupacional_corr_Not seeking for work"="Occ.Status- Not seeking work",
         "condicion_ocupacional_corr_Unemployed"="Occ.Status- Unemployed",
         "estado_conyugal_2_Separated/Divorced"="Marital Status-Separated/Divorced",
         "estado_conyugal_2_Single"= "Marital Status-Single",
         "estado_conyugal_2_Widower"="Marital Status-Widower",
         "escolaridad_rec_3-Completed primary school or less"="Educational Attainment-PS or less",
         "escolaridad_rec_2-Completed high school or less"="Educational Attainment-HS or less",
         "escolaridad_rec_1-More than high school"="Educational Attainment-More than HS",
         "freq_cons_sus_prin_1 day a week or more"="Freq Drug Cons-1d/wk or more",
         "freq_cons_sus_prin_2 to 3 days a week"="Freq Drug Cons-2-3d/wk",
         "freq_cons_sus_prin_4 to 6 days a week"="Freq Drug Cons-4-6d/wk",
         "freq_cons_sus_prin_Daily"="Freq Drug Cons-Daily",
         "freq_cons_sus_prin_Did not use"="Freq Drug Cons-Did not use",
         "freq_cons_sus_prin_Less than 1 day a week"="Freq Drug Cons-Less 1d/wk",
         "nombre_region_Antofagasta (02)"="Region-Antofagasta(02)",
         "nombre_region_Araucanía (09)"="Region-Araucanía(09)",
         "nombre_region_Arica (15)"="Region-Arica(15)",
         "nombre_region_Atacama (03)"="Region-Atacama(03)",
         "nombre_region_Aysén (11)"="Region-Aysén(11)",
         "nombre_region_Biobío (08)"="Region- Biobío(08)",
         "nombre_region_Coquimbo (04)"="Region-Coquimbo(04)",
         "nombre_region_Los Lagos (10)"="Region-Los Lagos(10)",
         "nombre_region_Los Ríos (14)"="Region-Los Ríos(14)",
         "nombre_region_Magallanes (12)"="Region-Magallanes(12)",
         "nombre_region_Maule (07)"="Region-Maule(07)",
         "nombre_region_Metropolitana (13)"="Region-Metropolitana(13)",
         "nombre_region_Ñuble (16)"="Region-Ñuble(16)",
         "nombre_region_O'Higgins (06)"="Region-O'Higgins(06)",
         "nombre_region_Tarapacá (01)"="Region-Tarapacá(01)",
         "nombre_region_Valparaíso (05)"="Region-Valparaíso(05)",
         "tipo_centro_pub"="Public Center",
         "dg_trs_cons_sus_or"= "Drug Dependence",
         "edad_ini_cons"="Age of Onset of Drug Use",
         "rn"="Treatment")

var.names<-data.table(data.frame(unlist(var_names)),keep.rownames = T) %>% janitor::clean_names()

balance_prev<-
data.table::data.table(bal2$Balance[,1:6],keep.rownames = T) %>%
  dplyr::arrange(-abs(Diff.Un)) %>% 
  dplyr::left_join(var.names,by="rn") %>% 
  dplyr::select(unlist_var_names,everything()) %>% 
  dplyr::select(-rn) 

balance_prev %>% #data.table::data.table(keep.rownames = F)
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 4. Covariate Balance in the Variables of Interest"),
               col.names = c("Variables","Nature of Variables", "Unadjusted SMDs","Threshold","Unadjusted Variance Ratios","Threshold","Unadjusted KS"),
               align =rep('c', 101)) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 10) %>%
  kableExtra::add_footnote( c(paste("Note. ")), 
                            notation = "none") %>%
  kableExtra::scroll_box(width = "100%", height = "375px")
Table 4. Covariate Balance in the Variables of Interest
Variables Nature of Variables Unadjusted SMDs Threshold Unadjusted Variance Ratios Threshold Unadjusted KS
Public Center Binary -1.18 Not Balanced, >0.1 0.51
Occ.Status-Employed Binary -0.95 Not Balanced, >0.1 0.41
Freq Drug Cons-Daily Binary 0.72 Not Balanced, >0.1 0.34
Occ.Status- Unemployed Binary 0.62 Not Balanced, >0.1 0.30
Drug Dependence Binary 0.59 Not Balanced, >0.1 0.22
Freq Drug Cons-2-3d/wk Binary -0.52 Not Balanced, >0.1 0.21
Motive Admission-Assisted Referral Binary 0.48 Not Balanced, >0.1 0.17
Occ.Status- No activity Binary 0.38 Not Balanced, >0.1 0.11
Starting Substance-Alcohol Binary -0.36 Not Balanced, >0.1 0.17
ICD-10-Wo/Psych Comorbidity Binary -0.32 Not Balanced, >0.1 0.15
Marital Status-Married/Shared liv. arr. Binary -0.30 Not Balanced, >0.1 0.13
Date of Admission Contin. -0.29 Not Balanced, >0.1 1.00 Balanced, <2 0.14
Marital Status-Single Binary 0.28 Not Balanced, >0.1 0.14
Motive Admission-Spontaneous Binary -0.27 Not Balanced, >0.1 0.13
Freq Drug Cons-1d/wk or more Binary -0.25 Not Balanced, >0.1 0.05
Starting Substance-Marijuana Binary 0.23 Not Balanced, >0.1 0.10
ICD-10-W/Psych Comorbidity Binary 0.23 Not Balanced, >0.1 0.11
Region-Arica(15) Binary 0.21 Not Balanced, >0.1 0.04
Starting Substance-Cocaine paste Binary 0.20 Not Balanced, >0.1 0.07
Freq Drug Cons-Less 1d/wk Binary -0.19 Not Balanced, >0.1 0.03
Age at Admission Contin. -0.19 Not Balanced, >0.1 0.84 Balanced, <2 0.07
Region-Tarapacá(01) Binary 0.16 Not Balanced, >0.1 0.03
Sex-Women Binary 0.15 Not Balanced, >0.1 0.07
Occ.Status- Not seeking work Binary 0.15 Not Balanced, >0.1 0.02
Motive Admission-Justice Sector Binary -0.13 Not Balanced, >0.1 0.03
Educational Attainment-PS or less Binary 0.12 Not Balanced, >0.1 0.06
Region-Araucanía(09) Binary -0.12 Not Balanced, >0.1 0.02
Region-Magallanes(12) Binary -0.12 Not Balanced, >0.1 0.01
Freq Drug Cons-4-6d/wk Binary -0.11 Not Balanced, >0.1 0.04
Region-Antofagasta(02) Binary 0.11 Not Balanced, >0.1 0.02
Region-Coquimbo(04) Binary -0.10 Not Balanced, >0.1 0.02
Age of Onset of Drug Use Contin. -0.09 Balanced, <0.1 0.91 Balanced, <2 0.07
Region-Aysén(11) Binary -0.09 Balanced, <0.1 0.01
Region-Ñuble(16) Binary -0.09 Balanced, <0.1 0.01
Freq Drug Cons-Did not use Binary -0.08 Balanced, <0.1 0.01
ICD-10-Dg. Unknown/under study Binary 0.08 Balanced, <0.1 0.03
Educational Attainment-HS or less Binary -0.07 Balanced, <0.1 0.03
Educational Attainment-More than HS Binary -0.06 Balanced, <0.1 0.02
Region- Biobío(08) Binary -0.06 Balanced, <0.1 0.02
Region-Valparaíso(05) Binary 0.06 Balanced, <0.1 0.02
Region-Los Lagos(10) Binary -0.04 Balanced, <0.1 0.01
Region-Maule(07) Binary -0.04 Balanced, <0.1 0.01
Occ.Status-Inactive Binary -0.04 Balanced, <0.1 0.01
Motive Admission-Other Binary 0.03 Balanced, <0.1 0.01
Region-Atacama(03) Binary -0.03 Balanced, <0.1 0.01
Region-O’Higgins(06) Binary -0.03 Balanced, <0.1 0.01
Marital Status-Widower Binary -0.02 Balanced, <0.1 0.00
Motive Admission-Health Sector Binary -0.02 Balanced, <0.1 0.01
Occ.Status-Looking 1st job Binary -0.02 Balanced, <0.1 0.00
Starting Substance-Other Binary 0.01 Balanced, <0.1 0.00
Marital Status-Separated/Divorced Binary -0.01 Balanced, <0.1 0.00
Region-Los Ríos(14) Binary -0.01 Balanced, <0.1 0.00
Region-Metropolitana(13) Binary -0.01 Balanced, <0.1 0.01
Starting Substance-Cocaine hydrochloride Binary 0.00 Balanced, <0.1 0.00
Note.


We generated a plot to focus on unbalanced data.


Figure 8. Covariates Balance on Different Values

Figure 8. Covariates Balance on Different Values

Specification

First, we had to discretize categorical variables into logical parameters, and for continuous covariates, we divide them into 20 equal parts.


catVars<-
c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","tipo_centro_pub","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","dg_trs_cons_sus_or","nombre_region","tipo_de_plan_res","sexo_2","condicion_ocupacional_corr")
columna_dummy <- function(df, columna) {
  df %>% 
  mutate_at(columna, ~paste(columna, eval(as.symbol(columna)), sep = "_")) %>% 
    mutate(valor = 1) %>% 
    spread(key = columna, value = valor, fill = 0)
}

quantiles = function(covar, n_q) {
    p_q = seq(0, 1, 1/n_q)
    val_q = quantile(covar, probs = p_q, na.rm = TRUE)
    covar_out = rep(NA, length(covar))
    for (i in 1:n_q) {
        if (i==1) {covar_out[covar<val_q[i+1]] = i}
        if (i>1 & i<n_q) {covar_out[covar>=val_q[i] & covar<val_q[i+1]] = i}
        if (i==n_q) {covar_out[covar>=val_q[i] & covar<=val_q[i+1]] = i}}
    covar_out
}

CONS_C1_df_dup_SEP_2020_match_not_miss3<-CONS_C1_df_dup_SEP_2020_match_not_miss2
for (i in c(1:length(catVars))){#catVars[-10] excluding treatment indicator
  cat<-as.character(catVars[i])#catVars[-10] excluding treatment indicator
  CONS_C1_df_dup_SEP_2020_match_not_miss3<-columna_dummy(CONS_C1_df_dup_SEP_2020_match_not_miss3,cat)
}
CONS_C1_df_dup_SEP_2020_match_not_miss3$tipo_de_plan_res_FALSE<-NULL
CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_ini_cons<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_ini_cons,20)
CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_al_ing<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_al_ing,20)
CONS_C1_df_dup_SEP_2020_match_not_miss3$fech_ing_num<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$fech_ing_num,20)
match.on.sel2<-names(CONS_C1_df_dup_SEP_2020_match_not_miss3)[-c(1,2,5)]
#"edad_ini_cons","edad_al_ing","fech_ing_num")

CONS_SEP_match = data.table::data.table(CONS_C1_df_dup_SEP_2020_match_not_miss2[order(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res, decreasing = TRUE), ])

CONS_SEP_match_dum = data.table::data.table(CONS_C1_df_dup_SEP_2020_match_not_miss3 %>% dplyr::arrange(factor(row, levels = CONS_SEP_match$row)))


Match

The matched variables were defined for the treatments at baseline (n=84,776).


library(designmatch)

#fine = list(covs = fine_covs)
#solver = list(name = name, t_max = t_max, approximate = 1, round_cplex = 0, trace_cplex = 0).
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
# 1. Gurobi installation

#For an exact solution, we strongly recommend running designmatch either with CPLEX or Gurobi.  Between these two solvers, the R interface of Gurobi is considerably easier to install.  Here we provide general instructions for manually installing Gurobi and its R interface in Mac and Windows machines.

#1. Create a free academic license
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/creating_a_new_academic_li.html

#2. Install the software
#   2.1. In http://www.gurobi.com/index, go to Downloads > Gurobi Software
#   2.2. Choose your operating system and press download
#
#3. Retrieve and set up your Gurobi license
#   2.1. Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_and_setting_up_.html
#   2.2. Then follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_a_free_academic.html
#
#4. Test your license
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/testing_your_license.html
#
#5. Install the R interface of Gurobi   
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/r_installing_the_r_package.html
#   * In Windows, in R run the command install.packages("PATH\\gurobi_7.X-Y.zip", repos=NULL) where path leads to the file gurobi_7.X-Y.zip (for example PATH=C:\\gurobi702\\win64\\R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#   * In MAC, in R run the command install.packages('PATH/gurobi_7.X-Y.tgz', repos=NULL) where path leads to the file gurobi_7.X-Y.tgz (for example PATH=/Library/gurobi702/mac64/R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#       
#6. Test the installation 
#   Load the library and run the examples therein
#   * A possible error that you may get is the following: "Error: package ‘slam’ required by ‘gurobi’ could not be found". If that case, install.packages('slam') and try again.
#   You should be all set!
CONS_SEP_match$tipo_de_plan_res<-ifelse(CONS_SEP_match$tipo_de_plan_res=="1",1,0)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
require(slam)
# Solver options
#default solver is glpk with approximate = 1
#For an exact solution, we strongly recommend using cplex or gurobi as they are much faster than the other solvers, but they do require a license (free for academics, but not for people outside universities)
t_max = 60*6
solver = "gurobi" #cplex, glpk, gurobi and symphony
solver = list(name = solver, 
  t_max = t_max, #t_max is a scalar with the maximum time limit for finding the matches.within this time limit, a partial, suboptimal solution is given
  approximate = 1,#. If approximate = 1 (the default), an approximate solution is found via a relaxation of the original integer program
  round_cplex = 0, 
  trace = 1#turns the optimizer output on
  )

#Indicador de tratamiento
t_ind= ifelse(CONS_SEP_match$tipo_de_plan_res=="1",1,0)

#table(is.na(CONS_SEP_match$tipo_de_plan_res))

# Moment balance: constrain differences in means to be at most 0.1 standard deviations apart
#:#:#:#:#:#:#:#:#:#:#:#:#:
#######mom_covs is a matrix where each column is a covariate whose mean is to be balanced
#######mom_tols is a vector of tolerances for the maximum difference in means for the covariates in mom_covs
#######mom_targets is a vector of target moments (e.g., means) of a distribution to be approximated by matched sampling. is optional, but if #######mom_covs is specified then mom_tols needs to be specified too
#######The lengths of mom_tols and mom_target have to be equal to the number of columns of mom_covs
mom_covs = cbind(CONS_SEP_match$edad_al_ing,
                 CONS_SEP_match$fech_ing_num,
                 CONS_SEP_match$edad_ini_cons)
mom_tols = absstddif(mom_covs, t_ind, .0999)# original, 0.05, ahora probaré con 0.7
mom = list(covs = mom_covs, tols = mom_tols, targets = NULL)

# Mean balance
covs = cbind(CONS_SEP_match$edad_al_ing,
                 CONS_SEP_match$fech_ing_num,
                 CONS_SEP_match$edad_ini_cons)
meantab(covs, t_ind)
##      Mis      Min      Max   Mean T   Mean C Std Dif P-val
## [1,]   0    14.88    88.84    35.99    35.99       0     1
## [2,]   0 13621.00 18199.00 16445.49 16445.49       0     1
## [3,]   0     5.00    74.00    16.51    16.51       0     1
# Fine balance
#is a matrix where each column is a nominal covariate for fine balance
fine_covs = cbind(CONS_SEP_match$origen_ingreso_mod,
                  CONS_SEP_match$dg_cie_10_rec,
                  CONS_SEP_match$sexo_2,
                  CONS_SEP_match$sus_ini_mod_mvv,
                  CONS_SEP_match$tipo_centro_pub, #cuidado
                  CONS_SEP_match$estado_conyugal_2, 
                  CONS_SEP_match$escolaridad_rec,
                  CONS_SEP_match$freq_cons_sus_prin,
                  CONS_SEP_match$nombre_region,
                  CONS_SEP_match$condicion_ocupacional_corr,
                  #d_match_no_duplicates$evaluacindelprocesoteraputico,
                  CONS_SEP_match$dg_trs_cons_sus_or
)
fine = list(covs = fine_covs)

# 11,448; No. of controls: 11,448"
# 11,452; No. of controls: 11,452"
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#MATCH
start.time <- Sys.time()
set.seed(2125)
out = cardmatch(t_ind, #ES NECESARIO QUE LOS TRATAMIENTOS ESTEN ORDENADOS Y LOS OTROS VECTORES TAMBIËN 
                mom = mom,# ya los definí list(covs = mom_covs, tols = mom_tols, targets = mom_targets), 
          fine = fine, 
          solver = solver)
##   Building the matching problem... 
##   Gurobi optimizer is open... 
##   Finding the optimal matches... 
## Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (win64)
## Thread count: 6 physical cores, 12 logical processors, using up to 12 threads
## Optimize a model with 60 rows, 84776 columns and 1441192 nonzeros
## Model fingerprint: 0xb641b034
## Variable types: 0 continuous, 84776 integer (84776 binary)
## Coefficient statistics:
##   Matrix range     [1e+00, 2e+04]
##   Objective range  [1e+00, 1e+00]
##   Bounds range     [0e+00, 0e+00]
##   RHS range        [0e+00, 0e+00]
## Found heuristic solution: objective -0.0000000
## Presolve time: 1.91s
## Presolved: 60 rows, 84776 columns, 1440983 nonzeros
## Variable types: 0 continuous, 84776 integer (84776 binary)
## 
## Root relaxation: objective 1.144854e+04, 397 iterations, 0.78 seconds
## 
##     Nodes    |    Current Node    |     Objective Bounds      |     Work
##  Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time
## 
##      0     0 11448.5389    0   33   -0.00000 11448.5389      -     -    2s
## H    0     0                    3260.0000000 11448.5389   251%     -    7s
##      0     0 11448.5388    0   34 3260.00000 11448.5388   251%     -    7s
## H    0     0                    11448.000000 11448.5388  0.00%     -    8s
##      0     0 11448.5388    0   34 11448.0000 11448.5388  0.00%     -    8s
## 
## Cutting planes:
##   Zero half: 1
## 
## Explored 1 nodes (410 simplex iterations) in 8.85 seconds
## Thread count was 12 (of 12 available processors)
## 
## Solution count 3: 11448 3260 -0 
## 
## Optimal solution found (tolerance 1.00e-04)
## Best objective 1.144800000000e+04, best bound 1.144800000000e+04, gap 0.0000%
##   Optimal matches found
end.time <- Sys.time()
time.taken <- end.time - start.time
# Fine balance (note here we are getting an approximate solution)
#for (i in 1:ncol(fine_covs)) {     
#   print(finetab(fine_covs[, i], t_id_1, c_id_1))
#}
# Indices of the treated units and matched controls
t_id_1 = out$t_id  
c_id_1 = out$c_id   
group = out$group_id    
ids_matched<-cbind.data.frame(t_id_1, c_id_1,group)

paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
## [1] "No. of treatments: 11,448; No. of controls: 11,448"
# Fine balance (note here we are getting an approximate solution)
finetab_match1<-data.frame()
for (i in 1:ncol(fine_covs)) {      
    #finetab_match1<- rbind.data.frame(
  finetab(fine_covs[, i], t_id_1, c_id_1)
}

d_match = CONS_SEP_match[c(t_id_1, c_id_1), ]

paste0("Number of duplicated rows: ",d_match %>%  dplyr::group_by(row) %>%  dplyr::mutate(n_row=n()) %>% dplyr::ungroup() %>% dplyr::filter(n_row>1) %>% nrow())
## [1] "Number of duplicated rows: 0"
#cuidado, el anterior me encontró más del mismo control para un tratado
#por eso ocuparé el de más abajo.
#EL DE A CONTINUACIÓN ES ERRÓNEO PORQUE ES POR POSICIÓN, NO POR COINCIDENCIA DEL NÚMERO CON LA FILA
#d_match_no_duplicates = CONS_SEP_match[which(CONS_SEP_match$row %in% c(t_id_1, c_id_1)), ]


Explore Results of the Matching


Age at Admission

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Age of Onset of Drug Use

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Date of Admission

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample


Love plot

Figure 10. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 10. Love plot of the Matched Sample in Covariates v/s Unmatched Sample


Balance

Table 5. Covariate Balance in the Variables of Interest
Unadjusted
Adjusted
Variables Nature of Variables SMDs Threshold Variance Ratios Threshold KS SMDs Threshold Variance Ratios Threshold KS
Public Center Binary -1.18 Not Balanced, >0.1 0.51 0.00 Balanced, <0.1 0.00
Occ.Status-Employed Binary -0.95 Not Balanced, >0.1 0.41 0.00 Balanced, <0.1 0.00
Freq Drug Cons-Daily Binary 0.72 Not Balanced, >0.1 0.34 0.00 Balanced, <0.1 0.00
Occ.Status- Unemployed Binary 0.62 Not Balanced, >0.1 0.30 0.00 Balanced, <0.1 0.00
Drug Dependence Binary 0.59 Not Balanced, >0.1 0.22 0.00 Balanced, <0.1 0.00
Freq Drug Cons-2-3d/wk Binary -0.52 Not Balanced, >0.1 0.21 0.00 Balanced, <0.1 0.00
Motive Admission-Assisted Referral Binary 0.48 Not Balanced, >0.1 0.17 0.00 Balanced, <0.1 0.00
Occ.Status- No activity Binary 0.38 Not Balanced, >0.1 0.11 0.00 Balanced, <0.1 0.00
Starting Substance-Alcohol Binary -0.36 Not Balanced, >0.1 0.17 0.00 Balanced, <0.1 0.00
>1 treatment Binary 0.33 Not Balanced, >0.1 0.14 0.22 Not Balanced, >0.1 0.10
ICD-10-Wo/Psych Comorbidity Binary -0.32 Not Balanced, >0.1 0.15 0.00 Balanced, <0.1 0.00
Treatments (#) Contin. 0.31 Not Balanced, >0.1 1.91 Balanced, <2 0.14 0.19 Not Balanced, >0.1 1.41 Balanced, <2 0.10
Marital Status-Married/Shared liv. arr. Binary -0.30 Not Balanced, >0.1 0.13 0.00 Balanced, <0.1 0.00
Date of Admission Contin. -0.29 Not Balanced, >0.1 1.00 Balanced, <2 0.14 -0.10 Balanced, <0.1 0.93 Balanced, <2 0.06
Marital Status-Single Binary 0.28 Not Balanced, >0.1 0.14 0.00 Balanced, <0.1 0.00
Motive Admission-Spontaneous Binary -0.27 Not Balanced, >0.1 0.13 0.00 Balanced, <0.1 0.00
Freq Drug Cons-1d/wk or more Binary -0.25 Not Balanced, >0.1 0.05 0.00 Balanced, <0.1 0.00
ICD-10-W/Psych Comorbidity Binary 0.23 Not Balanced, >0.1 0.11 0.00 Balanced, <0.1 0.00
Starting Substance-Marijuana Binary 0.23 Not Balanced, >0.1 0.10 0.00 Balanced, <0.1 0.00
Region-Arica(15) Binary 0.21 Not Balanced, >0.1 0.04 0.00 Balanced, <0.1 0.00
Starting Substance-Cocaine paste Binary 0.20 Not Balanced, >0.1 0.07 0.00 Balanced, <0.1 0.00
Age at Admission Contin. -0.19 Not Balanced, >0.1 0.84 Balanced, <2 0.07 0.06 Balanced, <0.1 0.99 Balanced, <2 0.04
Freq Drug Cons-Less 1d/wk Binary -0.19 Not Balanced, >0.1 0.03 0.00 Balanced, <0.1 0.00
Region-Tarapacá(01) Binary 0.16 Not Balanced, >0.1 0.03 0.00 Balanced, <0.1 0.00
Sex-Women Binary 0.15 Not Balanced, >0.1 0.07 0.00 Balanced, <0.1 0.00
Occ.Status- Not seeking work Binary 0.15 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Motive Admission-Justice Sector Binary -0.13 Not Balanced, >0.1 0.03 0.00 Balanced, <0.1 0.00
Educational Attainment-PS or less Binary 0.12 Not Balanced, >0.1 0.06 0.00 Balanced, <0.1 0.00
Region-Araucanía(09) Binary -0.12 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Magallanes(12) Binary -0.12 Not Balanced, >0.1 0.01 0.00 Balanced, <0.1 0.00
Freq Drug Cons-4-6d/wk Binary -0.11 Not Balanced, >0.1 0.04 0.00 Balanced, <0.1 0.00
Region-Antofagasta(02) Binary 0.11 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Coquimbo(04) Binary -0.10 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Aysén(11) Binary -0.09 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-Ñuble(16) Binary -0.09 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Age of Onset of Drug Use Contin. -0.09 Balanced, <0.1 0.91 Balanced, <2 0.07 0.00 Balanced, <0.1 1.02 Balanced, <2 0.01
ICD-10-Dg. Unknown/under study Binary 0.08 Balanced, <0.1 0.03 0.00 Balanced, <0.1 0.00
Freq Drug Cons-Did not use Binary -0.08 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Educational Attainment-HS or less Binary -0.07 Balanced, <0.1 0.03 0.00 Balanced, <0.1 0.00
Educational Attainment-More than HS Binary -0.06 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Region- Biobío(08) Binary -0.06 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Valparaíso(05) Binary 0.06 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Los Lagos(10) Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-Maule(07) Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Occ.Status-Inactive Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Motive Admission-Other Binary 0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-Atacama(03) Binary -0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-O’Higgins(06) Binary -0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Motive Admission-Health Sector Binary -0.02 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Marital Status-Widower Binary -0.02 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Occ.Status-Looking 1st job Binary -0.02 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Starting Substance-Other Binary 0.01 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Marital Status-Separated/Divorced Binary -0.01 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Region-Los Ríos(14) Binary -0.01 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Region-Metropolitana(13) Binary -0.01 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Starting Substance-Cocaine hydrochloride Binary 0.00 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Note. Unadjusted (n=84,776) ; Adjusted (n=22,896) ; Total pairs: 11,448


Figure 13. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 13. Love plot of the Matched Sample in Covariates v/s Unmatched Sample


We allowed to tolerate fech_ing_num (SMD=0.11), because the date of admission not necessarily had to be strictly balanced, assuming that not every user had to be admitted to treatment in exact dates.

Survival Setting

Bivariate

irrs<-function(x, y="event", z="person_days",db){
  #x= variable que agrupa
  #y= evento explicado
  #z= person days
  #db= base de datos
  fmla <- as.formula(paste0(y,"~",x))
  fmla2 <- as.formula(paste0(z,"~",x))
assign(paste0("irr_",y,"_por_",x),
       rateratio.test::rateratio.test(
     x=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)],
     n=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)]
    )
   )
return(
  rateratio.test::rateratio.test(
     x=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)],
     n=as.numeric(xtabs(fmla2, data=get(db)))[c(2,1)]
      )
    )
}

#CONS_C1_df_dup_SEP_2020%>% 
#  dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% 
#  janitor::tabyl(condicion_ocupacional_corr)

# d_match %>% 
    #dplyr::group_by(row) %>% dplyr::mutate(rn_row=row_number()) %>% janitor::tabyl(rn_row)
#22,914

#
#d_match_surv %>% janitor::tabyl(duplicates_filtered,event)
#nrow(ids_matched)/2 =11,457

#CONS_SEP_match %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% janitor::tabyl(rn_hash)

ids_matched_filter<-
ids_matched %>% 
    dplyr::group_by(t_id_1) %>% 
    dplyr::mutate(rn_id=row_number()) %>% 
    dplyr::ungroup() %>% 
    dplyr::filter(rn_id==1)

ids_matched_rows<-cbind.data.frame("row_t"=CONS_SEP_match[c(t_id_1),"row"],
                        t_id_1,
                        "row_c"=CONS_SEP_match[c(c_id_1),"row"],
                        c_id_1) %>% 
  janitor::clean_names() %>% 
  dplyr::left_join(subset(ids_matched_filter,select=-c_id_1),by="t_id_1")

CONS_C1_df_dup_SEP_2020_irrs_health<-  
d_match %>% 
  dplyr::left_join(CONS_C1_df_dup_SEP_2020[c("row","dias_treat_imp_sin_na", "event", "person_days", "person_years","diff_bet_treat")],by="row") %>%
  dplyr::left_join(ids_matched_rows, by=c("row")) %>% 
  dplyr::mutate(group_match=ifelse(!is.na(group),group,NA)) %>% 
  dplyr::select(-rn_id,-group) %>% #glimpse()
  dplyr::rename("row_c"="row_2") %>% 
  dplyr::left_join(ids_matched_rows, by=c("row"="row_2")) %>% 
  dplyr::mutate(t_id_1=ifelse(!is.na(t_id_1.x),t_id_1.x,t_id_1.y)) %>% 
  dplyr::mutate(c_id_1=ifelse(!is.na(c_id_1.x),c_id_1.x,c_id_1.y)) %>% 
  dplyr::mutate(row_c=ifelse(!is.na(row_c),row_c,row.y)) %>% 
  dplyr::mutate(group_match=ifelse(!is.na(group),group,group_match)) %>% 

  dplyr::select(-t_id_1.x,-c_id_1.x,-t_id_1.y,-c_id_1.y,-group,-row.y,-rn_id) %>% #glimpse()
  
  dplyr::mutate(res_drop_out=dplyr::case_when(
  tipo_de_plan_res==1 & abandono_temprano_rec==TRUE ~1,
  TRUE~0)) %>% 
  dplyr::mutate(min_ach=dplyr::case_when(
  evaluacindelprocesoteraputico=="3-Minimum Achievement" ~1,
  TRUE~0)) %>% 
  dplyr::mutate(res_drop_out=factor(res_drop_out)) %>% 
    dplyr::mutate(min_ach=factor(min_ach)) %>% 
  dplyr::mutate(status_censorship=dplyr::case_when(
  motivodeegreso_mod_imp=="Ongoing treatmentt" ~1,
  TRUE~0)) 
  
# CONS_C1_df_dup_SEP_2020_irrs_health%>% janitor::tabyl(cnt_diagnostico_trs_fisico_irr)
#label(CONS_C1_df_dup_SEP_2020_prev4_explore$dg_fis_anemia) <- "Physical Dg. Anemia"
#   cnt_mod_cie_10_or cnt_otros_probl_at_sm_or

#22,914
#d_match %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% nrow()

#27 Y ALGO
#CONS_C1_df_dup_SEP_2020_irrs_health %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% nrow()

# HAY UN SEGUNDO TRATAMIENTO PARA 4,565 CASOS
#PARA VER SI HAY MAS DE UN CASO POR USUARIO
#CONS_C1_df_dup_SEP_2020_irrs_health %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% janitor::tabyl(rn_hash)

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

irrs_early_drop<-irrs(x="abandono_temprano_rec" ,db="CONS_C1_df_dup_SEP_2020_irrs_health")
irrs_res_plan<-irrs(x="tipo_de_plan_res" ,db="CONS_C1_df_dup_SEP_2020_irrs_health")
irrs_res_early<-irrs(x="res_drop_out" ,db="CONS_C1_df_dup_SEP_2020_irrs_health")
irrs_min_ach<-irrs(x="min_ach" ,db="CONS_C1_df_dup_SEP_2020_irrs_health")


The incidence rate of readmission was 1.61 (95% IC 1.52-1.72) in users that had at least an early dropout, compared with users that did not have a physical condition at baseline (p = 0.000).


Figure 12. Cum. Hazards to Experience Readmission to SUD Treatment, by Ealy Dropout of Treatment at Baseline

Figure 12. Cum. Hazards to Experience Readmission to SUD Treatment, by Ealy Dropout of Treatment at Baseline


The incidence rate of readmission was 1.24 (95% IC 1.18-1.3) in users that had a residential plan, compared with users that had an ambulatory plan at baseline (p = 0.000).


Figure 13. Cum. Hazards to Experience Readmission to SUD Treatment, by Type of Plan at Baseline

Figure 13. Cum. Hazards to Experience Readmission to SUD Treatment, by Type of Plan at Baseline


The incidence rate of readmission was 1.66 (95% IC 1.54-1.79) in users that had a residential plan and an early dropout, compared with the rest of users at baseline (p = 0.000).


Figure 14. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person in a Residential Treatment with an Early Dropout

Figure 14. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person in a Residential Treatment with an Early Dropout


The incidence rate of readmission was 1.43 (95% IC 1.36-1.5) in users that had a minimum achievement of the therapeutic goals, compared with the rest of users at baseline (p = 0.000).


Figure 15. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person had a Minimum Achievement in Therapeutic Goals

Figure 15. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person had a Minimum Achievement in Therapeutic Goals


Multivariate


Inference for the regression coefficients is based on a within-pair treatment effect.


#The stratified Cox model can be used to perform Cox regression on matched designs by using stratification but it can also be done by modeling with frailties

#Some believe that accounting for the matching isn't necessary at all, since it doesn't affect beta coefficients materially and the variables which you have matched on can simply be adjusted for as covariates in the model; this is sufficient in most cases.

#A matched cohort study involves pairs (or clusters in case several untreated subjects are matched with each of the treated individuals) formed to include individuals who differ with respect to treatment but may be matched on certain baseline characteristics.

# Two common methods for analyzing paired/clustered survival data involve a stratified and a marginal Cox model, which represent 2 different approaches of accounting for potential correlation between paired outcomes (for discussion see Glidden and Vittinghoff [5]).

#A regression model is often a more powerful tool in detecting treatment effect than a matched study.

#Choices in study design are regression modeling or matched-pairs study.

#Brazauskas, R., & Logan, B. R. (2016). Observational Studies: Matching or Regression? Biology of Blood and Marrow Transplantation, 22(3), 557–563. doi:10.1016/j.bbmt.2015.12.005 



#simple expression of the common HR estimator would be a useful summary of exposure effect

#Shinozaki, T., Mansournia, M. A., & Matsuyama, Y. (2017). On hazard ratio estimators by proportional hazards models in matched-pair cohort studies. Emerging themes in epidemiology, 14, 6. https://doi.org/10.1186/s12982-017-0060-8

# "The covariate effects are so odd that we'll never model them correctly, so treat each combination as unique."The data set two needs to have each treated subject + their controls in a separate stratum - Terry Therneau

#Stratified approach
#For each pair, there is an unspecified baseline hazard function. The partial likelihood idea is readily adapted by multiplying the partial likelihoods specific to each stratum.
##Pros: Lack of structure. Cons: It does not provide any information about heterogeneity between pairs; Pairs in which both members shared the same covariate information or which provide only censoring observations do not contribute to the likelihood; this is because no between-pair comparisons are attempted. Heterogeneity is not described by a single parameter as frailty;

# Austin PC. A critical appraisal of propensity-score matching in the medical literature between 1996 and 2003. STATISTICS IN MEDICINE. Statist. Med. 2008; 27:2037–2049

#https://www.duo.uio.no/bitstream/handle/10852/10289/stat-res-11-97.pdf?sequence=1&isAllowed=y

memory.limit(size = 20000)
## [1] 20000
#Classical stratified tests

#This statistics reduces to the difference in the number of events in the 2 samples which occurr while both patients in the pair are at risk given the appropiate weight. 

#Klein, J. & Moeschberger, M. (2003) Survival Analysis: Statistical Methods for Censored and Truncated Data. 2nd Edition. Springer-Verlag. 

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
m1 <- coxph(Surv(diff_bet_treat,event) ~ strata(group_match) + tipo_de_plan_res, data = CONS_C1_df_dup_SEP_2020_irrs_health)

summary(m1)
## Call:
## coxph(formula = Surv(diff_bet_treat, event) ~ strata(group_match) + 
##     tipo_de_plan_res, data = CONS_C1_df_dup_SEP_2020_irrs_health)
## 
##   n= 6441, number of events= 6441 
##    (16455 observations deleted due to missingness)
## 
##                    coef exp(coef) se(coef)     z Pr(>|z|)    
## tipo_de_plan_res 0.3400    1.4049   0.0682 4.985 6.21e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## tipo_de_plan_res     1.405     0.7118     1.229     1.606
## 
## Concordance= 0.584  (se = 0.023 )
## Likelihood ratio test= 25.21  on 1 df,   p=5e-07
## Wald test            = 24.85  on 1 df,   p=6e-07
## Score (logrank) test = 25.09  on 1 df,   p=5e-07
cox.zph(m1)#Possibly, a log-normal or log-logistic AFT model would fit better than Cox.
##                  chisq df       p
## tipo_de_plan_res  11.5  1 0.00071
## GLOBAL            11.5  1 0.00071
m1b <- try_with_time_limit(
            survreg(Surv(diff_bet_treat+1,event)~ strata(group_match)+ tipo_de_plan_res,data=CONS_C1_df_dup_SEP_2020_irrs_health, dist="weibull"),
        elapsed = 60)
        
#The survreg function in R does not allow time = 0. This is because for several of the distributions, including the lognormal distribution, having events occur at time = 0 will result in an undefined estimator.
(m1b)

m2 <- eval_fork(
        coxph(Surv(diff_bet_treat,event) ~ frailty(group_match, 
          distribution = "gaussian", sparse = FALSE, method = "reml") + tipo_de_plan_res, 
          data = CONS_C1_df_dup_SEP_2020_irrs_health),
      timeout = 60)
summary(m2)

cox.zph(m2)
#CONS_C1_df_dup_SEP_2020$condicion_ocupacional_corr CONS_C1_df_dup_SEP_2020$cnt_diagnostico_trs_fisico CONS_C1_df_dup_SEP_2020$tenencia_de_la_vivienda_mod

##COx Diagnostics
#ggcoxzph(cox.zph(m1))
#ggcoxdiagnostics(m1, type = "dfbeta",
#                 linear.predictions = FALSE, ggtheme = theme_bw())
#ggcoxdiagnostics(m1, type = "deviance",
#                 linear.predictions = FALSE, ggtheme = theme_bw())
#It’s also possible to check outliers by visualizing the deviance residuals. The deviance residual is a normalized transform of the martingale residual. These residuals should be roughtly symmetrically distributed about zero with a standard deviation of 1.
#Positive values correspond to individuals that “died too soon” compared to expected survival times.
#Negative values correspond to individual that “lived too long”.
#Very large or small values are outliers, which are poorly predicted by the model.

#grid.arrange(
#  ggforest(m1, data=CONS_C1_df_dup_SEP_2020_irrs_health),
#  ggforest(m2, data=CONS_C1_df_dup_SEP_2020_irrs_health),
#  ncol=2
#)


There was evidence of proportional hazards. Users in residential treatments experience 40% within the study period than users in outpatient treatments (95% CI: 23% - 61%; p=0).


Multistate


#  dplyr::filter(motivodeegreso_mod_imp!="En curso")%>% #Sacar los tratamientos que estén en curso 


tab1_lab<- paste0('Original C1 Dataset \n(n = ', formatC(nrow(CONS_C1), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1%>% dplyr::distinct(HASH_KEY)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab2_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab1_5_lab<- paste0('&#8226; Duplicated entries\\l &#8226; Overlapping treatments of users\\l &#8226; Intermediate events of treatment (continuous referrals)')
tab4_lab<- paste0('Imputed C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab3_5_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab6_lab<- paste0('C1 Matched Sample\nin Treatment Setting \n(n = ', formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% nrow(), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')

lab_tab<- paste0("  Result of the matching on treatment setting\nNo. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))

#https://stackoverflow.com/questions/46750364/diagrammer-and-graphviz
#https://mikeyharper.uk/flowcharts-in-r-using-diagrammer/
#http://blog.nguyenvq.com/blog/2012/05/29/better-decision-tree-graphics-for-rpart-via-party-and-partykit/
#http://blog.nguyenvq.com/blog/2014/01/17/skeleton-to-create-fast-automatic-tree-diagrams-using-r-and-graphviz/
#https://cran.r-project.org/web/packages/DiagrammeR/vignettes/graphviz-mermaid.html
#https://stackoverflow.com/questions/39133058/how-to-use-graphviz-graphs-in-diagrammer-for-r
#https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781789802566/1/ch01lvl1sec21/creating-diagrams-via-the-diagrammer-package
#https://justlegal.be/2019/05/using-flowcharts-to-display-legal-procedures/
# paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
#
library(DiagrammeR) #⋉
grViz("digraph flowchart {
      # node definitions with substituted label text
      node [fontname = Times, shape = rectangle,fontsize = 9]        
      tab1 [label = '@@1']
      tab2 [label = '@@2']
      tab3 [label = '&#8226;Duplicated entries\\l&#8226;Intermediate events of treatment (continuous referrals)\\l',fontsize = 7]
      tab4 [label = '@@4']
      blank [label = '', width = 0.0001, height = 0.0001]
      blank2 [label = '', width = 0.0001, height = 0.0001]
      blank3 [label = '', width = 0.0001, height = 0.0001]
      tab5 [label = '&#8226;Logically Inconsistent candidates for imputation\\l&#8226;Ties in candidates for imputation\\l',fontsize = 7]
      tab6 [label= '@@6']
      tab7 [label = '&#8226;Mathing pairs based on balance of covariates at basline,\\l&#8226;Pairs 1:1\\l',fontsize = 7]
      
      
      # edge definitions with the node IDs
      tab1 -> blank [arrowhead = none,label='  Data wrangling and normalization process',fontsize = 8];
      blank -> tab3
      blank -> tab2
      tab2 -> blank2 [arrowhead = none];
      blank2 -> tab5 
      blank2 -> tab4 [label='  Result of the imputation of missing values',fontsize = 8];
      tab4 -> blank3 [arrowhead= none];
      blank3-> tab7
      blank3 -> tab6 [label='@@7',fontsize = 8];
            subgraph {
              rank = same; tab3; blank;
            }
            subgraph {
              rank = same; tab5; blank2;
            }
            subgraph {
              rank = same; tab7; blank3;
            }
      }

      [1]:  tab1_lab
      [2]:  tab2_lab
      [3]:  tab1_5_lab
      [4]:  tab4_lab
      [5]:  ''
      [6]:  tab6_lab
      [7]:  lab_tab
      ")
#      {rank=same; 'tab2'' -> tab3 [label='',fontsize = 11]}; #⋉
#CONS_C1_df_dup_SEP_2020_irrs_health
Table 6. Summary descriptives table
0 1 p.overall
N=17193 N=15137
Motive of Admission to Treatment (First Entry): <0.001
Spontaneous 7005 (40.7%) 5569 (36.8%)
Assisted Referral 2948 (17.1%) 3104 (20.5%)
Other 959 (5.58%) 918 (6.06%)
Justice Sector 1230 (7.15%) 980 (6.47%)
Health Sector 5051 (29.4%) 4566 (30.2%)
Psychiatric Comorbidity: <0.001
Without psychiatric comorbidity 4728 (27.5%) 3854 (25.5%)
Diagnosis unknown (under study) 3522 (20.5%) 3310 (21.9%)
With psychiatric comorbidity 8943 (52.0%) 7973 (52.7%)
Sexo Usuario/Sex of User: 0.066
Men 11462 (66.7%) 10238 (67.6%)
Women 5731 (33.3%) 4899 (32.4%)
Age at Admission to Treatment 32.7 [26.7;40.7] 33.1 [26.9;41.0] 0.032
Treatment Length (>90): <0.001
FALSE 14081 (81.9%) 12096 (79.9%)
TRUE 3112 (18.1%) 3035 (20.1%)
‘Missing’ 0 (0.00%) 6 (0.04%)
Treatments by User (#): 0.016
1 8783 (51.1%) 7672 (50.7%)
2 4728 (27.5%) 4102 (27.1%)
3 2192 (12.7%) 1921 (12.7%)
4 934 (5.43%) 842 (5.56%)
5 329 (1.91%) 356 (2.35%)
6 163 (0.95%) 155 (1.02%)
7 46 (0.27%) 59 (0.39%)
8 18 (0.10%) 30 (0.20%)
More than one treatment: 0.479
0 8783 (51.1%) 7672 (50.7%)
1 8410 (48.9%) 7465 (49.3%)
Starting Substance: <0.001
Alcohol 7477 (43.5%) 6220 (41.1%)
Cocaine hydrochloride 747 (4.34%) 642 (4.24%)
Cocaine paste 2496 (14.5%) 2369 (15.7%)
Marijuana 6051 (35.2%) 5525 (36.5%)
Other 422 (2.45%) 381 (2.52%)
Marital Status: <0.001
Married/Shared living arrangements 4285 (24.9%) 3470 (22.9%)
Separated/Divorced 1879 (10.9%) 1571 (10.4%)
Single 10842 (63.1%) 9948 (65.7%)
Widower 187 (1.09%) 148 (0.98%)
Educational Attainment: 0.004
3-Completed primary school or less 5253 (30.6%) 4855 (32.1%)
2-Completed high school or less 8924 (51.9%) 7780 (51.4%)
1-More than high school 3016 (17.5%) 2502 (16.5%)
Frequency of use of primary drug: <0.001
1 day a week or more 598 (3.48%) 328 (2.17%)
2 to 3 days a week 2514 (14.6%) 1573 (10.4%)
4 to 6 days a week 2453 (14.3%) 1969 (13.0%)
Daily 10901 (63.4%) 11007 (72.7%)
Did not use 337 (1.96%) 106 (0.70%)
Less than 1 day a week 390 (2.27%) 154 (1.02%)
Public Center: <0.001
FALSE 9908 (57.6%) 10763 (71.1%)
TRUE 7285 (42.4%) 4374 (28.9%)
Minimum Achievement in the Therapeutic Process: <0.001
Ongoing treatment 1113 (6.47%) 664 (4.39%)
Minimum achievement 8491 (49.4%) 6193 (40.9%)
High/Medium achievement 7589 (44.1%) 8280 (54.7%)
Drug Dependence: <0.001
FALSE 2086 (12.1%) 1318 (8.71%)
TRUE 15107 (87.9%) 13819 (91.3%)
Age of Onset of Drug Use 15.0 [14.0;17.0] 15.0 [13.0;17.0] 0.021
Occupational Status: <0.001
Employed 3833 (22.3%) 2015 (13.3%)
Inactive 1888 (11.0%) 1537 (10.2%)
Looking for a job for the first time 32 (0.19%) 23 (0.15%)
No activity 1865 (10.8%) 2141 (14.1%)
Not seeking for work 351 (2.04%) 400 (2.64%)
Unemployed 9224 (53.6%) 9021 (59.6%)
Days of Treatment (missing dates of discharge were replaced with difference from 2019-11-13) 154 [84.0;278] 150 [66.0;277] <0.001
Users with Posterior Treatments (=1): 0.479
0 8783 (51.1%) 7672 (50.7%)
1 8410 (48.9%) 7465 (49.3%)
User’s Days available in the system for the study 417 [147;1200] 401 [151;1094] <0.001
User’s Years available in the system for the study 1.14 [0.40;3.29] 1.10 [0.41;3.00] <0.001
Days of difference between the Next Treatment 353 [139;794] 262 [72.0;687] <0.001
Treatment Successful Completion: <0.001
Ongoing treatment 1113 (6.47%) 664 (4.39%)
Completion 3151 (18.3%) 4375 (28.9%)
Non-completion 12929 (75.2%) 10098 (66.7%)
Early Drop-out & Residential Plan (=1): 0.000
0 17193 (100%) 12102 (79.9%)
1 0 (0.00%) 3035 (20.1%)
Cause of Discharge: <0.001
Administrative discharge 1493 (8.68%) 1901 (12.6%)
Early Drop-out 3112 (18.1%) 3035 (20.1%)
Late Drop-out 6110 (35.5%) 2971 (19.6%)
Ongoing treatment 1113 (6.47%) 664 (4.39%)
Referral to another treatment 2214 (12.9%) 2191 (14.5%)
Therapeutic discharge 3151 (18.3%) 4375 (28.9%)
Note. Variables of C1 dataset had to be standardized before comparison;
Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;
Categorical variables are presented as number (%)


After matching, we selected 32,330 treatments (users=22,896).


#NOT DUPLICATES
#d_match_surv %>% 
#    dplyr::group_by(hash_key) %>% 
#    dplyr::mutate(dis_hash=n_distinct(n_hash)) %>% 
#    dplyr::ungroup() %>% 
#    dplyr::filter(dis_hash>1)

set.seed(2125)
random__users <- 
  d_match_surv %>% 
  dplyr::group_by(n_hash) %>% 
  summarise() %>% 
  sample_n(1000)

d_match_surv_sub_sample<-
d_match_surv %>% 
  dplyr::filter(n_hash %in% unlist(random__users))

library("frailtySurv")
memory.limit(size = 20000)
#frailty_1<-
#fitfrail(Surv(dias_treat_imp_sin_na,event)~ tipo_de_plan_res+ cluster(hash_key),d_match_surv,frailty="gamma")

m3 <-  R.utils::withTimeout(
  coxph(Surv(dias_treat_imp_sin_na,event) ~ tipo_de_plan_res + frailty(hash_key, 
    distribution = "gaussian", sparse = FALSE, method = "reml"), data = d_match_surv),
   timeout = 6000)
summary(m3)

#cox.zph(m3)#Possibly, a log-normal or log-logistic AFT model would fit better than Cox.


#https://rdrr.io/rforge/Epi/man/Lexis.html

Epi::Lexis(entry=0, exit=fech_egres_num, duration=dias_treat_imp_sin_na, entry.status = 0, exit.status = 0, id=hash_key, data=d_match_surv, merge=TRUE, states, notes=TRUE, tol=.Machine$double.eps^0.5, keep.dropped=FALSE )
library("mstate")
vector_surv_msm<- c("tr_completion","diff_bet_treat","dias_treat_imp_sin_na","min_achievement","abandono_temprano_rec","tipo_de_plan_res")
# dias_treat_imp_sin_na 

#NECESITO CREAR UNA BASE DE DATOS CON LA ENTRADA A UN TRATAMIENTO COMO UN EVENTO INDEPENDIENTE, EN UNA FILA SEPARADA DEL TÉRMINO DE TRATAMIENTO EN LONG.

d_match_surv %>% 
    dplyr::select(c("hash_key","dup","fech_ing_num",vector_surv_msm)) %>% 
    dplyr::arrange(hash_key,fech_ing_num) %>% #glimpse()
    dplyr::group_by(hash_key) %>% 
    #genero los años en el estudio para cada tratamiento
    dplyr::mutate(years_1=(fech_ing_num-min(fech_ing_num))/365.25) %>% 
    #genero un segundo valor con el de los años en estudio
    dplyr::mutate(years_2=dias_treat_imp_sin_na/365.25+years_1) %>% 
    dplyr::ungroup() %>% #glimpse()
    tidyr::pivot_longer(
     cols = starts_with("years_"),
     names_to = "events_within_tr",
     names_prefix = "years_",
     values_to = "years_in_study",
     values_drop_na = TRUE
    ) %>% 
  dplyr::select(-fech_ing_num,-diff_bet_treat) %>% 
  #Eventos entre tratamientos
  dplyr::mutate(events_within_tr=as.numeric(events_within_tr)) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(event_num=row_number()) %>% 
  dplyr::ungroup() %>%
  #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  dplyr::mutate(state=dplyr::case_when(
    #1)entrar a tratamiento residencial -Enter to a Residential Treatment (transient)
    events_within_tr==1 & tipo_de_plan_res==1~1,
    #2)completar tratamiento residencial -Complete a Residential Treatment (transient)
    tr_completion=="Completion" & events_within_tr==2 & tipo_de_plan_res==1~2,
    #3)no completar tratamiento residencial -Not Complete a Residential Treatment (transient)
    tr_completion=="Non-completion" & events_within_tr==2 & tipo_de_plan_res==1~3,
    #4)entrar a tratamiento ambulatorio -Enter to an Outpatient Treatment (transient)
    events_within_tr==1 & tipo_de_plan_res==0~4,
    #5)completar tratamiento ambulatorio -Complete an Outpatient Treatment (transient)
    tr_completion=="Completion" & events_within_tr==2 & tipo_de_plan_res==0~5,
    #6)no-completar tratamiento ambulatorio -Not Complete an Outpatient Treatment (transient)
    tr_completion=="Non-completion" & events_within_tr==2 & tipo_de_plan_res==0~6,
    #7)Censored Residential
    grepl("Ongoing",tr_completion) & tipo_de_plan_res==1 & events_within_tr==2~7,
    #7)Censored Outpatient
    grepl("Ongoing",tr_completion) & tipo_de_plan_res==0 & events_within_tr==2~8
    #tr_completion=="Ongoing treatment" & events_within_tr==2 & tipo_de_plan_res==0~9,
    #tr_completion=="Ongoing treatment" & events_within_tr==2 & tipo_de_plan_res==1~10
    )) %>% 
    dplyr::rename("PTNUM"="hash_key") %>% 
  #filtrar los que no han terminado su tratmaiento- no debería filtrarlos, porque quito el tiempo absorvido
  #dplyr::mutate(censored=dplyr::case_when(tr_completion=="Ongoing treatment" & events_within_tr==2~1,TRUE~0)) %>% 
  #dplyr::filter(censored==0) %>% 
  #janitor::tabyl(min_achievement,tr_completion)
  #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  #interval censoring. Continuous events should be differenced with the previous year in study with the minimum (0.001)
  #check if ordered
  dplyr::group_by(PTNUM) %>% 
  dplyr::mutate(diff_bet_events=years_in_study-lag(years_in_study)) %>% 
  dplyr::ungroup() %>% 
  #there are some values that were generated that still have minimum and arbitrary differences between events. We converted into 0.
  dplyr::mutate(diff_bet_events=dplyr::case_when(diff_bet_events<=.00001~0,
                                                 TRUE~diff_bet_events),
                years_in_study_corr=dplyr::case_when(diff_bet_events<=0~years_in_study+.00001,
                                                     TRUE~years_in_study)) %>% 
  dplyr::mutate(diff_bet_events=dplyr::case_when(diff_bet_events==0~.00001,
                                                 TRUE~diff_bet_events)) %>% 
  assign("d_match_surv_msm",.,envir=.GlobalEnv)


attr(d_match_surv_msm$years_in_study,"label") <- "Years in study"
attr(d_match_surv_msm$years_in_study_corr,"label") <- "Years in study (corrected for interval censored events)"
attr(d_match_surv_msm$events_within_tr,"label") <- "Years in study"
attr(d_match_surv_msm$event_num,"label") <- "Order of events (includes events at admission and at discharge, separately)"
attr(d_match_surv_msm$state,"label") <- "State (1 to 7)"

  #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  # 284 cases that changed once imputed the null differences with the posterior event in time
paste0(d_match_surv_msm %>% 
    dplyr::filter(years_in_study!=years_in_study_corr) %>% nrow()," cases that changed once imputed the null differences with the posterior event in time (were corrected)")
## [1] "284 cases that changed once imputed the null differences with the posterior event in time (were corrected)"
  # 0 cases with negative differences with the posterior event in time (not possible)
paste0(d_match_surv_msm %>% 
    dplyr::filter(diff_bet_events<0) %>% nrow()," cases with negative differences with the posterior event in time (not possible)")
## [1] "0 cases with negative differences with the posterior event in time (not possible)"
  #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  # Export wide format
d_match_surv_msm %>% 
  dplyr::select(PTNUM,event_num,state,years_in_study_corr,min_achievement) %>% 
  dplyr::mutate_at(vars(c("state","min_achievement","years_in_study_corr")),~as.character(.)) %>% 
    tidyr::pivot_longer(
     cols = -c("PTNUM","event_num"),
     names_to = "varswide",
     values_drop_na = F
    ) %>% 
    tidyr::pivot_wider(
                       names_from=c(varswide,event_num),
                       names_glue ="{varswide}_{event_num}",
                       values_from=value
                       ) %>% 
  dplyr::mutate_at(vars(starts_with("state_")),~as.numeric(.)) %>% 
  dplyr::mutate_at(vars(starts_with("years_in_study_corr_")),~as.numeric(.)) %>% 
assign("d_match_surv_msm_wide",.,envir=.GlobalEnv)

for (i in c(1:16)){  
    attr(d_match_surv_msm_wide[,paste0("state_",i)],"label") <-paste0("State (1 to 7) at s=",i)
    
        attr(d_match_surv_msm_wide[,paste0("years_in_study_corr_",i)],"label") <-paste0("Years in study (corrected for interval censored events) at s=",i)
    
    attr(d_match_surv_msm_wide[,paste0("min_achievement_",i)],"label") <-paste0("Achievement in the Therapeutic Process at s=",i)

}

rio::export(d_match_surv_msm_wide,"G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/d_match_surv_msm_wide.dta")

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Q matrix
#_#_#_#_#_#_
#_#_#_#_#_#_
library(msm)
#If an instantaneous transition is not allowed from state  to state , then Q should have  entry 0, otherwise it should be non-zero. The diagonal entries are ignored.
#entries define transition/movement probabilities of subjects through states
#probability of being in state j at time t + s, conditional on being in state i at time s
# in time homogeneous intensities the dependence of P on time will be omitted ---> no debiese ser mi caso
# each row must sum to one for an example row 1 summation equals to 1
# transition intensity matrix Q
 qmatrix<-matrix(statetable.msm(state, PTNUM, data = d_match_surv_msm),nrow=6,ncol=8)
 
msm::statetable.msm(state, PTNUM, data = d_match_surv_msm) %>% 
    data.table::data.table() %>% 
    dplyr::filter(N>0) %>% 
    dplyr::arrange(desc(N)) %>% 
    dplyr::mutate("%"=scales::percent(round(N/sum(N),1))) %>% 
    dplyr::mutate(from=dplyr::case_when(from==1~"Enter to a Residential Treatment(1)",
                                      from==2~"Complete a Residential Treatment(2)",
                                      from==3~"Not Complete a Residential Treatment(3)",
                                      from==4~"Enter to an Outpatient Treatment(4)",
                                      from==5~"Complete an Outpatient Treatment(5)",
                                      from==6~"Not Complete an Outpatient Treatment(6)",
                                      from==7~"Ongoing treatment (Residential)(Censored)(7)",
                                      from==8~"Ongoing treatment (Outpatient)(Censored)(8)")) %>% 
    dplyr::mutate(to=dplyr::case_when(to==1~"Enter to a Residential Treatment(1)",
                                      to==2~"Complete a Residential Treatment(2)",
                                      to==3~"Not Complete a Residential Treatment(3)",
                                      to==4~"Enter to an Outpatient Treatment(4)",
                                      to==5~"Complete an Outpatient Treatment(5)",
                                      to==6~"Not Complete an Outpatient Treatment(6)",
                                      to==7~"Ongoing treatment(Residential)(Censored)(7)",
                                      to==8~"Ongoing treatment(Outpatient)(Censored)(8)")) %>%   
  knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption="Table 7. State Transition Matrix",
               align= c("c",rep('c', 5)))%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 8) %>% 
  kableExtra::add_footnote("Note= Excluded transitions that were not found")
Table 7. State Transition Matrix
from to N %
Enter to an Outpatient Treatment(4) Not Complete an Outpatient Treatment(6) 12,929 30%
Enter to a Residential Treatment(1) Not Complete a Residential Treatment(3) 10,098 20%
Enter to a Residential Treatment(1) Complete a Residential Treatment(2) 4,375 10%
Enter to an Outpatient Treatment(4) Complete an Outpatient Treatment(5) 3,151 10%
Not Complete an Outpatient Treatment(6) Enter to an Outpatient Treatment(4) 2,482 10%
Not Complete a Residential Treatment(3) Enter to an Outpatient Treatment(4) 1,972 0%
Not Complete a Residential Treatment(3) Enter to a Residential Treatment(1) 1,700 0%
Enter to an Outpatient Treatment(4) Ongoing treatment(Outpatient)(Censored)(8) 1,113 0%
Not Complete an Outpatient Treatment(6) Enter to a Residential Treatment(1) 1,112 0%
Complete a Residential Treatment(2) Enter to an Outpatient Treatment(4) 794 0%
Complete a Residential Treatment(2) Enter to a Residential Treatment(1) 707 0%
Enter to a Residential Treatment(1) Ongoing treatment(Residential)(Censored)(7) 664 0%
Complete an Outpatient Treatment(5) Enter to an Outpatient Treatment(4) 497 0%
Complete an Outpatient Treatment(5) Enter to a Residential Treatment(1) 170 0%
a Note= Excluded transitions that were not found


We restricted the transitions following several criteria: cannot enter/complete/non-complete treatment two sequential times (e.g. 1 -> 4), or enter to a residential/outpatient treatment and complete an outpatient/residential (e.g. 4->2).


#msm2Surv(d_match_surv_msm, "PTNUM", "years_in_study_corr", "state", "min_achievement", Q=qmatrix)
 #Error in rowSums(Qf) : 'x' must be an array of at least two dimensions

#
#All other off-diagonal positions contain an initial value for the corresponding transition intensity.
##this matrix should usually only allow transitions between adjacent states
msm_mat_cens_allowed <- matrix(c(
                          0,1,1,0,0,0,
                          1,0,0,1,0,0,
                          1,0,0,1,0,0,
                          0,0,0,0,1,1,
                          1,0,0,1,0,0,
                          1,0,0,1,0,0
                          ), nrow=6, ncol=6,
                          byrow=TRUE,
                          dimnames=list(from=1:6,to=1:6))
# If exacttimes is set to TRUE, then the observation times are assumed to represent the exact times of transition of the process. 
# numerical overflow in calculating likelihood --> this was corrected with "control argument"
#msm_model<- msm(state ~ years_in_study_corr, subject = PTNUM, data = d_match_surv_msm, qmatrix = msm_mat, gen.inits=T,
#                control=list(fnscale=4000,reltol = 1e-16))
#If using gen.inits=TRUE then the non-zero entries in the qmatrix can be anything you like (conventionally 1).
#obstype= A vector specifying the observation scheme for each row of the data
msm_model_cens<- msm(state ~ years_in_study_corr, subject = PTNUM, data = d_match_surv_msm, qmatrix = msm_mat_cens_allowed, gen.inits=T,control=list(fnscale=4000,reltol = 1e-16),censor = c(7,8),censor.states = list(c(2,3),c(5,6)),obstype=1)
#si pongo obstype=2, Error in if (any(q2 < 0)) stop("off-diagonal entries of qmatrix should not be negative") : 


print(msm_model_cens)
## 
## Call:
## msm(formula = state ~ years_in_study_corr, subject = PTNUM, data = d_match_surv_msm,     qmatrix = msm_mat_cens_allowed, gen.inits = T, obstype = 1,     censor = c(7, 8), censor.states = list(c(2, 3), c(5, 6)),     control = list(fnscale = 4000, reltol = 1e-16))
## 
## Maximum likelihood estimates
## 
## Transition intensities
##       Baseline                          
## 1 - 1 -1.015e+05 (-1.344e+05,-7.672e+04)
## 1 - 2  4.302e+00 ( 4.060e+00, 4.559e+00)
## 1 - 3  1.015e+05 ( 7.672e+04, 1.344e+05)
## 2 - 1  1.676e+00 ( 1.536e+00, 1.829e+00)
## 2 - 2 -2.095e+00 (-2.248e+00,-1.952e+00)
## 2 - 4  4.185e-01 ( 3.847e-01, 4.553e-01)
## 3 - 1  3.476e+04 ( 2.620e+04, 4.612e+04)
## 3 - 3 -3.476e+04 (-4.612e+04,-2.620e+04)
## 3 - 4  2.352e-01 ( 2.183e-01, 2.534e-01)
## 4 - 4 -3.814e+01 (-4.237e+01,-3.432e+01)
## 4 - 5  1.622e+00 ( 1.535e+00, 1.714e+00)
## 4 - 6  3.651e+01 ( 3.273e+01, 4.074e+01)
## 5 - 1  1.492e-01 ( 1.221e-01, 1.824e-01)
## 5 - 4  1.113e+00 ( 1.015e+00, 1.220e+00)
## 5 - 5 -1.262e+00 (-1.367e+00,-1.165e+00)
## 6 - 1  1.360e-01 ( 1.250e-01, 1.479e-01)
## 6 - 4  1.353e+01 ( 1.206e+01, 1.519e+01)
## 6 - 6 -1.367e+01 (-1.532e+01,-1.219e+01)
## 
## -2 * log-likelihood:  105309.3 
## [Note, to obtain old print format, use "printold.msm"]
summary(msm_model_cens)
## $prevalences
## $prevalences$Observed
##                  State 1 State 2 State 3 State 4 State 5 State 6 Total
## 0                  11448       0       0   11448       0       0 22896
## 1.13456536618754    1737     605    1371    2203     226    1416  7558
## 2.26913073237509     439     523     945     777     273    1113  4070
## 3.40369609856263     243     370     581     485     207     775  2661
## 4.53826146475017     142     239     378     337     153     543  1792
## 5.67282683093771     105     141     224     237      95     324  1126
## 6.80739219712526      71      80     114     147      56     200   668
## 7.9419575633128       38      35      58      92      31      81   335
## 9.07652292950034      26      10      16      50      10      23   135
## 10.2110882956879       1       1       1       9       1       1    14
## 11.3456536618754       0       0       0       0       0       1     1
## 
## $prevalences$Expected
##                             1            2            3            4
## 0                1.144800e+04    0.0000000    0.0000000 1.144800e+04
## 1.13456536618754 5.507960e+02 1144.7232249 1608.7974953 9.096285e+02
## 2.26913073237509 2.609781e+02  560.8006875  762.2782802 5.120675e+02
## 3.40369609856263 1.577277e+02  334.7420216  460.6990023 3.482639e+02
## 4.53826146475017 1.006609e+02  211.4313471  294.0151406 2.407900e+02
## 5.67282683093771 6.099871e+01  127.1817921  178.1679056 1.538756e+02
## 6.80739219712526 3.532605e+01   73.2810327  103.1819627 9.227591e+01
## 7.9419575633128  1.743725e+01   36.0484196   50.9315056 4.659634e+01
## 9.07652292950034 6.954515e+00   14.3445533   20.3130638 1.886089e+01
## 10.2110882956879 7.163637e-01    1.4753808    2.0923874 1.961514e+00
## 11.3456536618754 5.094559e-02    0.1048221    0.1488042 1.403648e-01
##                            5            6 Total
## 0                  0.0000000    0.0000000 22896
## 1.13456536618754 917.7239636 2426.3308309  7558
## 2.26913073237509 610.0405158 1363.8349355  4070
## 3.40369609856263 431.2089908  928.3583122  2661
## 4.53826146475017 302.7492239  642.3534090  1792
## 5.67282683093771 195.0773001  410.6987281  1126
## 6.80739219712526 117.5688428  246.3662066   668
## 7.9419575633128   59.5541915  124.4322981   335
## 9.07652292950034  24.1537210   50.3732554   135
## 10.2110882956879   2.5151486    5.2392053    14
## 11.3456536618754   0.1801289    0.3749344     1
## 
## $prevalences$`Observed percentages`
##                    State 1   State 2   State 3  State 4  State 5    State 6
## 0                50.000000  0.000000  0.000000 50.00000 0.000000   0.000000
## 1.13456536618754 22.982270  8.004763 18.139720 29.14792 2.990209  18.735115
## 2.26913073237509 10.786241 12.850123 23.218673 19.09091 6.707617  27.346437
## 3.40369609856263  9.131905 13.904547 21.833897 18.22623 7.779030  29.124389
## 4.53826146475017  7.924107 13.337054 21.093750 18.80580 8.537946  30.301339
## 5.67282683093771  9.325044 12.522202 19.893428 21.04796 8.436945  28.774423
## 6.80739219712526 10.628743 11.976048 17.065868 22.00599 8.383234  29.940120
## 7.9419575633128  11.343284 10.447761 17.313433 27.46269 9.253731  24.179104
## 9.07652292950034 19.259259  7.407407 11.851852 37.03704 7.407407  17.037037
## 10.2110882956879  7.142857  7.142857  7.142857 64.28571 7.142857   7.142857
## 11.3456536618754  0.000000  0.000000  0.000000  0.00000 0.000000 100.000000
## 
## $prevalences$`Expected percentages`
##                          1        2        3        4        5        6
## 0                50.000000  0.00000  0.00000 50.00000  0.00000  0.00000
## 1.13456536618754  7.287589 15.14585 21.28602 12.03531 12.14242 32.10282
## 2.26913073237509  6.412237 13.77889 18.72920 12.58151 14.98871 33.50946
## 3.40369609856263  5.927386 12.57956 17.31300 13.08771 16.20477 34.88757
## 4.53826146475017  5.617236 11.79862 16.40709 13.43694 16.89449 35.84561
## 5.67282683093771  5.417292 11.29501 15.82308 13.66568 17.32480 36.47413
## 6.80739219712526  5.288331 10.97021 15.44640 13.81376 17.60013 36.88117
## 7.9419575633128   5.205148 10.76072 15.20343 13.90936 17.77737 37.14397
## 9.07652292950034  5.151492 10.62560 15.04671 13.97103 17.89165 37.31352
## 10.2110882956879  5.116883 10.53843 14.94562 14.01082 17.96535 37.42290
## 11.3456536618754  5.094559 10.48221 14.88042 14.03648 18.01289 37.49344
## 
## 
## $hazard
## NULL
## 
## $hazard.scale
## NULL
## 
## attr(,"class")
## [1] "summary.msm"
# If censor is a vector with more than one element, this should be a list, with each element a vector corresponding to the equivalent element of censor.
#1: In msm.form.cmodel(censor, censor.states, qmatrix) : some censoring indicators are the same as actual states
#2: In msm.form.cmodel(censor, censor.states, qmatrix) : some censoring indicators are the same as actual states


#CÓMO SETTEAR LA BASE DE DATOS PARA PONER COMO CENSURA EL ONGOING TREATMENT, Y LA COMPLETACIÓN DEL TRATAMIENTO COMO ALGO IMPORTANTE.
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#MSTATEs
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#See data structure:
#View(ebmt4)

invisible(c("tengo 2 problemas con esta aplicación: si veo ebmt, tiene formateado los casos en que cada columns es un tiempo a un determinado estado. Si llego a ese estado, entonces es un tiempo menor al person time y viene con un 0"))
 #file:///G:/Mi%20unidad/Alvacast/Srv%20Analysis/(Use%20R!)%20Frans%20Willekens%20-%20Multistate%20Analysis%20of%20Life%20Histories%20with%20R-Springer%20(2014).pdf
#In other words, the package requires one record for each potential transition. 

library("mstate")
#https://rdrr.io/cran/mstate/f/inst/doc/Tutorial.pdf 

#The first step in a multi-state model analysis is to set up the transition matrix. The transition
#matrix specifies which direct transitions are possible (those with NA are impossible) and assigns
#numbers to the transitions for future reference.
msm_mat_cens_allowed <- matrix(c(
                          NA,1,1,NA,NA,NA,
                          1,NA,NA,1,NA,NA,
                          1,NA,NA,1,NA,NA,
                          NA,NA,NA,NA,1,1,
                          1,NA,NA,1,NA,NA,
                          1,NA,NA,1,NA,NA
                          ), nrow=6, ncol=6,
                          byrow=TRUE,
                          dimnames=list(from=1:6,to=1:6))
library("mstate")
tmat <- mstate::transMat(x = list(c(2, 3), 
                         c(1,4), 
                         c(1,4), 
                         c(5,6),
                         c(1,4),
                         c(1,4)),
                       names = c("Enter R", "Comp R", "N-Comp R", "Enter O", 
                                 "Comp O", "N-Comp O"))
print(tmat)
#1~"Enter to a Residential Treatment(1)",
#from==2~"Complete a Residential Treatment(2)",
#from==3~"Not Complete a Residential Treatment(3)",
#from==4~"Enter to an Outpatient Treatment(4)",
#from==5~"Complete an Outpatient Treatment(5)",
#from==6~"Not Complete an Outpatient Treatment(6)",
#from==7~"Ongoing treatment (Residential)(Censored)(7)",
#from==8~"Ongoing treatment (Outpatient)(Censored)(8)"

Session Info

Sys.getenv("R_LIBS_USER")
## [1] "C:/Users/CISS Fondecyt/OneDrive/Documentos/R/win-library/4.0"
rstudioapi::getSourceEditorContext()
## Document Context: 
## - id:        '1EB5C54F'
## - path:      'G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/SUD_CL/Matching_Process_NOV.Rmd'
## - contents:  <3963 rows>
## Document Selection:
## - [3883, 1] -- [3883, 1]: ''
save.image("G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state.RData")

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Spanish_Chile.1252  LC_CTYPE=Spanish_Chile.1252   
## [3] LC_MONETARY=Spanish_Chile.1252 LC_NUMERIC=C                  
## [5] LC_TIME=Spanish_Chile.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] msm_1.6.8               mstate_0.2.12           lubridate_1.7.9        
##  [4] Amelia_1.7.6            Rcpp_1.0.5              polycor_0.7-10         
##  [7] compareGroups_4.4.5     DiagrammeR_1.0.6.1.9000 gurobi_9.1-0           
## [10] radiant.update_1.4.1    eha_2.8.1               cobalt_4.2.3           
## [13] sensitivityfull_1.5.6   sensitivity2x2xk_1.01   MatchIt_3.0.2          
## [16] tableone_0.12.0         stargazer_5.2.2         reshape2_1.4.4         
## [19] exactRankTests_0.8-31   gridExtra_2.3           foreign_0.8-80         
## [22] glpkAPI_1.3.2           designmatch_0.3.1       Rglpk_0.6-4            
## [25] slam_0.1-47             MASS_7.3-51.6           survMisc_0.5.5         
## [28] ggfortify_0.4.10        rateratio.test_1.0-2    survminer_0.4.8        
## [31] ggpubr_0.4.0            epiR_1.0-15             forcats_0.5.0          
## [34] purrr_0.3.4             readr_1.3.1             tibble_3.0.3           
## [37] tidyverse_1.3.0         treemapify_2.5.3        ggiraph_0.7.0          
## [40] chilemapas_0.2          sf_0.9-3                finalfit_1.0.1         
## [43] lsmeans_2.30-0          emmeans_1.4.8           choroplethrAdmin1_1.1.1
## [46] choroplethrMaps_1.0.1   choroplethr_3.6.3       acs_2.1.4              
## [49] XML_3.99-0.3            RColorBrewer_1.1-2      panelr_0.7.3           
## [52] lme4_1.1-23             Matrix_1.2-18           dplyr_1.0.1            
## [55] data.table_1.13.0       codebook_0.9.2          devtools_2.3.0         
## [58] usethis_1.6.1           sqldf_0.4-11            RSQLite_2.2.0          
## [61] gsubfn_0.7              proto_1.0.0             broom_0.7.0            
## [64] zoo_1.8-8               altair_4.0.1            rbokeh_0.5.1           
## [67] janitor_2.0.1           plotly_4.9.2.1          kableExtra_1.1.0       
## [70] Hmisc_4.4-0             Formula_1.2-3           survival_3.1-12        
## [73] lattice_0.20-41         ggplot2_3.3.2           stringr_1.4.0          
## [76] stringi_1.4.6           tidyr_1.1.1             knitr_1.29             
## [79] matrixStats_0.56.0      boot_1.3-25            
## 
## loaded via a namespace (and not attached):
##   [1] class_7.3-17        ps_1.3.3            rprojroot_1.3-2    
##   [4] crayon_1.3.4        V8_3.1.0            nlme_3.1-148       
##   [7] backports_1.1.7     reprex_0.3.0        ggcorrplot_0.1.3   
##  [10] rlang_0.4.7         readxl_1.3.1        performance_0.4.8  
##  [13] nloptr_1.2.2.2      callr_3.4.3         flextable_0.5.10   
##  [16] rjson_0.2.20        ggmap_3.0.0         bit64_0.9-7        
##  [19] glue_1.4.1          sjPlot_2.8.4        parallel_4.0.2     
##  [22] processx_3.4.3      classInt_0.4-3      tcltk_4.0.2        
##  [25] haven_2.3.1         tidyselect_1.1.0    km.ci_0.5-2        
##  [28] rio_0.5.16          sjmisc_2.8.5        chron_2.3-55       
##  [31] xtable_1.8-4        magrittr_1.5        evaluate_0.14      
##  [34] gdtools_0.2.2       RgoogleMaps_1.4.5.3 cli_2.0.2          
##  [37] rstudioapi_0.11     sp_1.4-2            rpart_4.1-15       
##  [40] jtools_2.0.5        sjlabelled_1.1.6    RJSONIO_1.3-1.4    
##  [43] maps_3.3.0          gistr_0.5.0         xfun_0.16          
##  [46] parameters_0.8.2    pkgbuild_1.1.0      cluster_2.1.0      
##  [49] ggfittext_0.9.0     expm_0.999-4        png_0.1-7          
##  [52] withr_2.2.0         bitops_1.0-6        plyr_1.8.6         
##  [55] cellranger_1.1.0    e1071_1.7-3         survey_4.0         
##  [58] coda_0.19-3         pillar_1.4.6        multcomp_1.4-13    
##  [61] fs_1.5.0            vctrs_0.3.2         ellipsis_0.3.1     
##  [64] generics_0.0.2      rgdal_1.5-8         tools_4.0.2        
##  [67] munsell_0.5.0       compiler_4.0.2      pkgload_1.1.0      
##  [70] abind_1.4-5         tigris_0.9.4        sessioninfo_1.1.1  
##  [73] visNetwork_2.0.9    jsonlite_1.7.0      WDI_2.6.0          
##  [76] scales_1.1.1        carData_3.0-4       estimability_1.3   
##  [79] lazyeval_0.2.2      car_3.0-8           latticeExtra_0.6-29
##  [82] reticulate_1.16     effectsize_0.3.2    checkmate_2.0.0    
##  [85] rmarkdown_2.5       openxlsx_4.1.5      sandwich_2.5-1     
##  [88] statmod_1.4.34      webshot_0.5.2       pander_0.6.3       
##  [91] yaml_2.2.1          systemfonts_0.2.3   htmltools_0.5.0    
##  [94] memoise_1.1.0       viridisLite_0.3.0   jsonvalidate_1.1.0 
##  [97] digest_0.6.25       assertthat_0.2.1    rappdirs_0.3.1     
## [100] repr_1.1.0          bayestestR_0.7.2    BiasedUrn_1.07     
## [103] KMsurv_0.1-5        units_0.6-6         remotes_2.2.0      
## [106] blob_1.2.1          splines_4.0.2       labeling_0.3       
## [109] hms_0.5.3           rmapshaper_0.4.4    modelr_0.1.8       
## [112] colorspace_1.4-1    base64enc_0.1-3     nnet_7.3-14        
## [115] mvtnorm_1.1-1       fansi_0.4.1         truncnorm_1.0-8    
## [118] R6_2.4.1            grid_4.0.2          crul_0.9.0         
## [121] lifecycle_0.2.0     acepack_1.4.1       labelled_2.5.0     
## [124] zip_2.1.1           writexl_1.3         curl_4.3           
## [127] geojsonlint_0.4.0   ggsignif_0.6.0      pryr_0.1.4         
## [130] minqa_1.2.4         testthat_2.3.2      snakecase_0.11.0   
## [133] desc_1.2.0          TH.data_1.0-10      htmlwidgets_1.5.1  
## [136] officer_0.3.13      crosstalk_1.1.0.1   rvest_0.3.6        
## [139] insight_0.9.0       htmlTable_2.0.1     codetools_0.2-16   
## [142] prettyunits_1.1.1   dbplyr_1.4.4        vegawidget_0.3.1   
## [145] gtable_0.3.0        DBI_1.1.0           httr_1.4.2         
## [148] highr_0.8           KernSmooth_2.23-17  farver_2.0.3       
## [151] uuid_0.1-4          hexbin_1.28.1       mice_3.11.0        
## [154] xml2_1.3.2          ggeffects_0.15.1    bit_1.1-15.2       
## [157] sjstats_0.18.0      jpeg_0.1-8.1        pkgconfig_2.0.3    
## [160] maptools_1.0-1      rstatix_0.6.0       mitools_2.4        
## [163] HardyWeinberg_1.6.6 Rsolnp_1.16         httpcode_0.3.0